1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "internal-fn.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
47 #include "tree-ssa-loop-manip.h"
49 #include "tree-ssa-loop.h"
50 #include "tree-scalar-evolution.h"
52 #include "tree-vectorizer.h"
55 /* For lang_hooks.types.type_for_mode. */
56 #include "langhooks.h"
58 /* Return the vectorized type for the given statement. */
61 stmt_vectype (struct _stmt_vec_info *stmt_info)
63 return STMT_VINFO_VECTYPE (stmt_info);
66 /* Return TRUE iff the given statement is in an inner loop relative to
67 the loop being vectorized. */
69 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71 gimple *stmt = STMT_VINFO_STMT (stmt_info);
72 basic_block bb = gimple_bb (stmt);
73 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 loop = LOOP_VINFO_LOOP (loop_vinfo);
81 return (bb->loop_father == loop->inner);
84 /* Record the cost of a statement, either by directly informing the
85 target model or by saving it in a vector for later processing.
86 Return a preliminary estimate of the statement's cost. */
89 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
90 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
91 int misalign, enum vect_cost_model_location where)
95 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
96 stmt_info_for_cost si = { count, kind,
97 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
99 body_cost_vec->safe_push (si);
101 (builtin_vectorization_cost (kind, vectype, misalign) * count);
104 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
105 count, kind, stmt_info, misalign, where);
108 /* Return a variable of type ELEM_TYPE[NELEMS]. */
111 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
113 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 /* ARRAY is an array of vectors created by create_vector_array.
118 Return an SSA_NAME for the vector in index N. The reference
119 is part of the vectorization of STMT and the vector is associated
120 with scalar destination SCALAR_DEST. */
123 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
124 tree array, unsigned HOST_WIDE_INT n)
126 tree vect_type, vect, vect_name, array_ref;
129 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
130 vect_type = TREE_TYPE (TREE_TYPE (array));
131 vect = vect_create_destination_var (scalar_dest, vect_type);
132 array_ref = build4 (ARRAY_REF, vect_type, array,
133 build_int_cst (size_type_node, n),
134 NULL_TREE, NULL_TREE);
136 new_stmt = gimple_build_assign (vect, array_ref);
137 vect_name = make_ssa_name (vect, new_stmt);
138 gimple_assign_set_lhs (new_stmt, vect_name);
139 vect_finish_stmt_generation (stmt, new_stmt, gsi);
144 /* ARRAY is an array of vectors created by create_vector_array.
145 Emit code to store SSA_NAME VECT in index N of the array.
146 The store is part of the vectorization of STMT. */
149 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
150 tree array, unsigned HOST_WIDE_INT n)
155 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
156 build_int_cst (size_type_node, n),
157 NULL_TREE, NULL_TREE);
159 new_stmt = gimple_build_assign (array_ref, vect);
160 vect_finish_stmt_generation (stmt, new_stmt, gsi);
163 /* PTR is a pointer to an array of type TYPE. Return a representation
164 of *PTR. The memory reference replaces those in FIRST_DR
168 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
170 tree mem_ref, alias_ptr_type;
172 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
181 /* Function vect_mark_relevant.
183 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
186 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
187 enum vect_relevant relevant, bool live_p,
188 bool used_in_pattern)
190 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
191 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
192 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
193 gimple *pattern_stmt;
195 if (dump_enabled_p ())
196 dump_printf_loc (MSG_NOTE, vect_location,
197 "mark relevant %d, live %d.\n", relevant, live_p);
199 /* If this stmt is an original stmt in a pattern, we might need to mark its
200 related pattern stmt instead of the original stmt. However, such stmts
201 may have their own uses that are not in any pattern, in such cases the
202 stmt itself should be marked. */
203 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
206 if (!used_in_pattern)
208 imm_use_iterator imm_iter;
212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
213 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
215 if (is_gimple_assign (stmt))
216 lhs = gimple_assign_lhs (stmt);
218 lhs = gimple_call_lhs (stmt);
220 /* This use is out of pattern use, if LHS has other uses that are
221 pattern uses, we should mark the stmt itself, and not the pattern
223 if (lhs && TREE_CODE (lhs) == SSA_NAME)
224 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
226 if (is_gimple_debug (USE_STMT (use_p)))
228 use_stmt = USE_STMT (use_p);
230 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
233 if (vinfo_for_stmt (use_stmt)
234 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
244 /* This is the last stmt in a sequence that was detected as a
245 pattern that can potentially be vectorized. Don't mark the stmt
246 as relevant/live because it's not going to be vectorized.
247 Instead mark the pattern-stmt that replaces it. */
249 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
251 if (dump_enabled_p ())
252 dump_printf_loc (MSG_NOTE, vect_location,
253 "last stmt in pattern. don't mark"
254 " relevant/live.\n");
255 stmt_info = vinfo_for_stmt (pattern_stmt);
256 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
257 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
258 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
263 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
264 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
265 STMT_VINFO_RELEVANT (stmt_info) = relevant;
267 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
268 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
270 if (dump_enabled_p ())
271 dump_printf_loc (MSG_NOTE, vect_location,
272 "already marked relevant/live.\n");
276 worklist->safe_push (stmt);
280 /* Function vect_stmt_relevant_p.
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
290 CHECKME: what other side effects would the vectorizer allow? */
293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294 enum vect_relevant *relevant, bool *live_p)
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
298 imm_use_iterator imm_iter;
302 *relevant = vect_unused_in_scope;
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308 != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
311 /* changing memory. */
312 if (gimple_code (stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt)
314 && !gimple_clobber_p (stmt))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p)))
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
347 return (*live_p || *relevant);
351 /* Function exist_non_indexing_operands_for_use_p
353 USE is one of the uses attached to STMT. Check if USE is
354 used in STMT for anything other than indexing an array. */
357 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
362 /* USE corresponds to some operand in STMT. If there is no data
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info))
368 /* STMT has a data_ref. FORNOW this means that its of one of
372 (This should have been verified in analyze_data_refs).
374 'var' in the second case corresponds to a def, not a use,
375 so USE cannot correspond to any operands that are not used
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
381 if (!gimple_assign_copy_p (stmt))
383 if (is_gimple_call (stmt)
384 && gimple_call_internal_p (stmt))
385 switch (gimple_call_internal_fn (stmt))
388 operand = gimple_call_arg (stmt, 3);
393 operand = gimple_call_arg (stmt, 2);
403 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
405 operand = gimple_assign_rhs1 (stmt);
406 if (TREE_CODE (operand) != SSA_NAME)
417 Function process_use.
420 - a USE in STMT in a loop represented by LOOP_VINFO
421 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
422 that defined USE. This is done by calling mark_relevant and passing it
423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
433 - case 1: If USE is used only for address computations (e.g. array indexing),
434 which does not need to be directly vectorized, then the liveness/relevance
435 of the respective DEF_STMT is left unchanged.
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
441 Return true if everything is as expected. Return false otherwise. */
444 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
445 enum vect_relevant relevant, vec<gimple *> *worklist,
448 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
449 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
450 stmt_vec_info dstmt_vinfo;
451 basic_block bb, def_bb;
453 enum vect_def_type dt;
455 /* case 1: we are only interested in uses that need to be vectorized. Uses
456 that are used for address computation are not considered relevant. */
457 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
460 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
462 if (dump_enabled_p ())
463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
464 "not vectorized: unsupported use in stmt.\n");
468 if (!def_stmt || gimple_nop_p (def_stmt))
471 def_bb = gimple_bb (def_stmt);
472 if (!flow_bb_inside_loop_p (loop, def_bb))
474 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo = vinfo_for_stmt (def_stmt);
485 bb = gimple_bb (stmt);
486 if (gimple_code (stmt) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
488 && gimple_code (def_stmt) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
490 && bb->loop_father == def_bb->loop_father)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE, vect_location,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
496 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
499 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
510 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE, vect_location,
514 "outer-loop def-stmt defining inner-loop stmt.\n");
518 case vect_unused_in_scope:
519 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
520 vect_used_in_scope : vect_unused_in_scope;
523 case vect_used_in_outer_by_reduction:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
525 relevant = vect_used_by_reduction;
528 case vect_used_in_outer:
529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
530 relevant = vect_used_in_scope;
533 case vect_used_in_scope:
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
548 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE, vect_location,
552 "inner-loop def-stmt defining outer-loop stmt.\n");
556 case vect_unused_in_scope:
557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
558 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
559 vect_used_in_outer_by_reduction : vect_unused_in_scope;
562 case vect_used_by_reduction:
563 relevant = vect_used_in_outer_by_reduction;
566 case vect_used_in_scope:
567 relevant = vect_used_in_outer;
575 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
576 is_pattern_stmt_p (stmt_vinfo));
581 /* Function vect_mark_stmts_to_be_vectorized.
583 Not all stmts in the loop need to be vectorized. For example:
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
595 This pass detects such stmts. */
598 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
601 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
602 unsigned int nbbs = loop->num_nodes;
603 gimple_stmt_iterator si;
606 stmt_vec_info stmt_vinfo;
610 enum vect_relevant relevant, tmp_relevant;
611 enum vect_def_type def_type;
613 if (dump_enabled_p ())
614 dump_printf_loc (MSG_NOTE, vect_location,
615 "=== vect_mark_stmts_to_be_vectorized ===\n");
617 auto_vec<gimple *, 64> worklist;
619 /* 1. Init worklist. */
620 for (i = 0; i < nbbs; i++)
623 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
626 if (dump_enabled_p ())
628 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
632 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
633 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
637 stmt = gsi_stmt (si);
638 if (dump_enabled_p ())
640 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
644 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
655 stmt = worklist.pop ();
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant and live/dead according to the
664 liveness and relevance properties of STMT. */
665 stmt_vinfo = vinfo_for_stmt (stmt);
666 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
669 /* Generally, the liveness and relevance properties of STMT are
670 propagated as is to the DEF_STMTs of its USEs:
671 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
672 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
674 One exception is when STMT has been identified as defining a reduction
675 variable; in this case we set the liveness/relevance as follows:
677 relevant = vect_used_by_reduction
678 This is because we distinguish between two kinds of relevant stmts -
679 those that are used by a reduction computation, and those that are
680 (also) used by a regular computation. This allows us later on to
681 identify stmts that are used solely by a reduction, and therefore the
682 order of the results that they produce does not have to be kept. */
684 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
685 tmp_relevant = relevant;
688 case vect_reduction_def:
689 switch (tmp_relevant)
691 case vect_unused_in_scope:
692 relevant = vect_used_by_reduction;
695 case vect_used_by_reduction:
696 if (gimple_code (stmt) == GIMPLE_PHI)
701 if (dump_enabled_p ())
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
703 "unsupported use of reduction.\n");
710 case vect_nested_cycle:
711 if (tmp_relevant != vect_unused_in_scope
712 && tmp_relevant != vect_used_in_outer_by_reduction
713 && tmp_relevant != vect_used_in_outer)
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
717 "unsupported use of nested cycle.\n");
725 case vect_double_reduction_def:
726 if (tmp_relevant != vect_unused_in_scope
727 && tmp_relevant != vect_used_by_reduction)
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
731 "unsupported use of double reduction.\n");
743 if (is_pattern_stmt_p (stmt_vinfo))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (is_gimple_assign (stmt))
750 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
751 tree op = gimple_assign_rhs1 (stmt);
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
756 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
757 live_p, relevant, &worklist, false)
758 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
759 live_p, relevant, &worklist, false))
763 for (; i < gimple_num_ops (stmt); i++)
765 op = gimple_op (stmt, i);
766 if (TREE_CODE (op) == SSA_NAME
767 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
772 else if (is_gimple_call (stmt))
774 for (i = 0; i < gimple_call_num_args (stmt); i++)
776 tree arg = gimple_call_arg (stmt, i);
777 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
784 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
786 tree op = USE_FROM_PTR (use_p);
787 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
792 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
795 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
797 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
801 } /* while worklist */
807 /* Function vect_model_simple_cost.
809 Models cost for simple operations, i.e. those that only emit ncopies of a
810 single op. Right now, this does not account for multiple insns that could
811 be generated for the single vector op. We will handle that shortly. */
814 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
815 enum vect_def_type *dt,
816 stmt_vector_for_cost *prologue_cost_vec,
817 stmt_vector_for_cost *body_cost_vec)
820 int inside_cost = 0, prologue_cost = 0;
822 /* The SLP costs were already calculated during SLP tree build. */
823 if (PURE_SLP_STMT (stmt_info))
826 /* FORNOW: Assuming maximum 2 args per stmts. */
827 for (i = 0; i < 2; i++)
828 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
829 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
830 stmt_info, 0, vect_prologue);
832 /* Pass the inside-of-loop statements to the target-specific cost model. */
833 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
834 stmt_info, 0, vect_body);
836 if (dump_enabled_p ())
837 dump_printf_loc (MSG_NOTE, vect_location,
838 "vect_model_simple_cost: inside_cost = %d, "
839 "prologue_cost = %d .\n", inside_cost, prologue_cost);
843 /* Model cost for type demotion and promotion operations. PWR is normally
844 zero for single-step promotions and demotions. It will be one if
845 two-step promotion/demotion is required, and so on. Each additional
846 step doubles the number of instructions required. */
849 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
850 enum vect_def_type *dt, int pwr)
853 int inside_cost = 0, prologue_cost = 0;
854 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
855 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
856 void *target_cost_data;
858 /* The SLP costs were already calculated during SLP tree build. */
859 if (PURE_SLP_STMT (stmt_info))
863 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
865 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
867 for (i = 0; i < pwr + 1; i++)
869 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
871 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
872 vec_promote_demote, stmt_info, 0,
876 /* FORNOW: Assuming maximum 2 args per stmts. */
877 for (i = 0; i < 2; i++)
878 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
879 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
880 stmt_info, 0, vect_prologue);
882 if (dump_enabled_p ())
883 dump_printf_loc (MSG_NOTE, vect_location,
884 "vect_model_promotion_demotion_cost: inside_cost = %d, "
885 "prologue_cost = %d .\n", inside_cost, prologue_cost);
888 /* Function vect_cost_group_size
890 For grouped load or store, return the group_size only if it is the first
891 load or store of a group, else return 1. This ensures that group size is
892 only returned once per group. */
895 vect_cost_group_size (stmt_vec_info stmt_info)
897 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
899 if (first_stmt == STMT_VINFO_STMT (stmt_info))
900 return GROUP_SIZE (stmt_info);
906 /* Function vect_model_store_cost
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
912 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
913 bool store_lanes_p, enum vect_def_type dt,
915 stmt_vector_for_cost *prologue_cost_vec,
916 stmt_vector_for_cost *body_cost_vec)
919 unsigned int inside_cost = 0, prologue_cost = 0;
920 struct data_reference *first_dr;
923 if (dt == vect_constant_def || dt == vect_external_def)
924 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925 stmt_info, 0, vect_prologue);
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
932 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
937 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
938 group_size = vect_cost_group_size (stmt_info);
941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
943 /* Not a grouped access. */
947 first_dr = STMT_VINFO_DATA_REF (stmt_info);
950 /* We assume that the cost of a single store-lanes instruction is
951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p && group_size > 1
955 && !STMT_VINFO_STRIDED_P (stmt_info))
957 /* Uses a high and low interleave or shuffle operations for each
959 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
960 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
961 stmt_info, 0, vect_body);
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_NOTE, vect_location,
965 "vect_model_store_cost: strided group_size = %d .\n",
969 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
970 /* Costs of the stores. */
971 if (STMT_VINFO_STRIDED_P (stmt_info)
972 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
974 /* N scalar stores plus extracting the elements. */
975 inside_cost += record_stmt_cost (body_cost_vec,
976 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
977 scalar_store, stmt_info, 0, vect_body);
980 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
982 if (STMT_VINFO_STRIDED_P (stmt_info))
983 inside_cost += record_stmt_cost (body_cost_vec,
984 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
985 vec_to_scalar, stmt_info, 0, vect_body);
987 if (dump_enabled_p ())
988 dump_printf_loc (MSG_NOTE, vect_location,
989 "vect_model_store_cost: inside_cost = %d, "
990 "prologue_cost = %d .\n", inside_cost, prologue_cost);
994 /* Calculate cost of DR's memory access. */
996 vect_get_store_cost (struct data_reference *dr, int ncopies,
997 unsigned int *inside_cost,
998 stmt_vector_for_cost *body_cost_vec)
1000 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1001 gimple *stmt = DR_STMT (dr);
1002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1004 switch (alignment_support_scheme)
1008 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1009 vector_store, stmt_info, 0,
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE, vect_location,
1014 "vect_model_store_cost: aligned.\n");
1018 case dr_unaligned_supported:
1020 /* Here, we assign an additional cost for the unaligned store. */
1021 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1022 unaligned_store, stmt_info,
1023 DR_MISALIGNMENT (dr), vect_body);
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_NOTE, vect_location,
1026 "vect_model_store_cost: unaligned supported by "
1031 case dr_unaligned_unsupported:
1033 *inside_cost = VECT_MAX_COST;
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1037 "vect_model_store_cost: unsupported access.\n");
1047 /* Function vect_model_load_cost
1049 Models cost for loads. In the case of grouped accesses, the last access
1050 has the overhead of the grouped access attributed to it. Since unaligned
1051 accesses are supported for loads, we also account for the costs of the
1052 access scheme chosen. */
1055 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1056 bool load_lanes_p, slp_tree slp_node,
1057 stmt_vector_for_cost *prologue_cost_vec,
1058 stmt_vector_for_cost *body_cost_vec)
1062 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1063 unsigned int inside_cost = 0, prologue_cost = 0;
1065 /* Grouped accesses? */
1066 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1067 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1069 group_size = vect_cost_group_size (stmt_info);
1070 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1072 /* Not a grouped access. */
1079 /* We assume that the cost of a single load-lanes instruction is
1080 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1081 access is instead being provided by a load-and-permute operation,
1082 include the cost of the permutes. */
1083 if (!load_lanes_p && group_size > 1
1084 && !STMT_VINFO_STRIDED_P (stmt_info))
1086 /* Uses an even and odd extract operations or shuffle operations
1087 for each needed permute. */
1088 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1089 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1090 stmt_info, 0, vect_body);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: strided group_size = %d .\n",
1098 /* The loads themselves. */
1099 if (STMT_VINFO_STRIDED_P (stmt_info)
1100 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1102 /* N scalar loads plus gathering them into a vector. */
1103 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1104 inside_cost += record_stmt_cost (body_cost_vec,
1105 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1106 scalar_load, stmt_info, 0, vect_body);
1109 vect_get_load_cost (first_dr, ncopies,
1110 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1111 || group_size > 1 || slp_node),
1112 &inside_cost, &prologue_cost,
1113 prologue_cost_vec, body_cost_vec, true);
1114 if (STMT_VINFO_STRIDED_P (stmt_info))
1115 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1116 stmt_info, 0, vect_body);
1118 if (dump_enabled_p ())
1119 dump_printf_loc (MSG_NOTE, vect_location,
1120 "vect_model_load_cost: inside_cost = %d, "
1121 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1125 /* Calculate cost of DR's memory access. */
1127 vect_get_load_cost (struct data_reference *dr, int ncopies,
1128 bool add_realign_cost, unsigned int *inside_cost,
1129 unsigned int *prologue_cost,
1130 stmt_vector_for_cost *prologue_cost_vec,
1131 stmt_vector_for_cost *body_cost_vec,
1132 bool record_prologue_costs)
1134 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1135 gimple *stmt = DR_STMT (dr);
1136 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1138 switch (alignment_support_scheme)
1142 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1143 stmt_info, 0, vect_body);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: aligned.\n");
1151 case dr_unaligned_supported:
1153 /* Here, we assign an additional cost for the unaligned load. */
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1155 unaligned_load, stmt_info,
1156 DR_MISALIGNMENT (dr), vect_body);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE, vect_location,
1160 "vect_model_load_cost: unaligned supported by "
1165 case dr_explicit_realign:
1167 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1168 vector_load, stmt_info, 0, vect_body);
1169 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1170 vec_perm, stmt_info, 0, vect_body);
1172 /* FIXME: If the misalignment remains fixed across the iterations of
1173 the containing loop, the following cost should be added to the
1175 if (targetm.vectorize.builtin_mask_for_load)
1176 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1177 stmt_info, 0, vect_body);
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE, vect_location,
1181 "vect_model_load_cost: explicit realign\n");
1185 case dr_explicit_realign_optimized:
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: unaligned software "
1192 /* Unaligned software pipeline has a load of an address, an initial
1193 load, and possibly a mask operation to "prime" the loop. However,
1194 if this is an access in a group of loads, which provide grouped
1195 access, then the above cost should only be considered for one
1196 access in the group. Inside the loop, there is a load op
1197 and a realignment op. */
1199 if (add_realign_cost && record_prologue_costs)
1201 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1202 vector_stmt, stmt_info,
1204 if (targetm.vectorize.builtin_mask_for_load)
1205 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1206 vector_stmt, stmt_info,
1210 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1211 stmt_info, 0, vect_body);
1212 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1213 stmt_info, 0, vect_body);
1215 if (dump_enabled_p ())
1216 dump_printf_loc (MSG_NOTE, vect_location,
1217 "vect_model_load_cost: explicit realign optimized"
1223 case dr_unaligned_unsupported:
1225 *inside_cost = VECT_MAX_COST;
1227 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1229 "vect_model_load_cost: unsupported access.\n");
1238 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1239 the loop preheader for the vectorized stmt STMT. */
1242 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1248 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1249 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1253 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1257 if (nested_in_vect_loop_p (loop, stmt))
1260 pe = loop_preheader_edge (loop);
1261 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1262 gcc_assert (!new_bb);
1266 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1268 gimple_stmt_iterator gsi_bb_start;
1270 gcc_assert (bb_vinfo);
1271 bb = BB_VINFO_BB (bb_vinfo);
1272 gsi_bb_start = gsi_after_labels (bb);
1273 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1277 if (dump_enabled_p ())
1279 dump_printf_loc (MSG_NOTE, vect_location,
1280 "created new init_stmt: ");
1281 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1285 /* Function vect_init_vector.
1287 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1288 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1289 vector type a vector with all elements equal to VAL is created first.
1290 Place the initialization at BSI if it is not NULL. Otherwise, place the
1291 initialization at the loop preheader.
1292 Return the DEF of INIT_STMT.
1293 It will be used in the vectorization of STMT. */
1296 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1301 if (TREE_CODE (type) == VECTOR_TYPE
1302 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1304 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1306 if (CONSTANT_CLASS_P (val))
1307 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1310 new_temp = make_ssa_name (TREE_TYPE (type));
1311 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1312 vect_init_vector_1 (stmt, init_stmt, gsi);
1316 val = build_vector_from_val (type, val);
1319 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1320 init_stmt = gimple_build_assign (new_temp, val);
1321 vect_init_vector_1 (stmt, init_stmt, gsi);
1326 /* Function vect_get_vec_def_for_operand.
1328 OP is an operand in STMT. This function returns a (vector) def that will be
1329 used in the vectorized stmt for STMT.
1331 In the case that OP is an SSA_NAME which is defined in the loop, then
1332 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1334 In case OP is an invariant or constant, a new stmt that creates a vector def
1335 needs to be introduced. */
1338 vect_get_vec_def_for_operand (tree op, gimple *stmt)
1343 stmt_vec_info def_stmt_info = NULL;
1344 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1345 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1346 enum vect_def_type dt;
1350 if (dump_enabled_p ())
1352 dump_printf_loc (MSG_NOTE, vect_location,
1353 "vect_get_vec_def_for_operand: ");
1354 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1355 dump_printf (MSG_NOTE, "\n");
1358 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1359 gcc_assert (is_simple_use);
1360 if (dump_enabled_p ())
1362 int loc_printed = 0;
1366 dump_printf (MSG_NOTE, " def_stmt = ");
1368 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1369 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1375 /* operand is a constant or a loop invariant. */
1376 case vect_constant_def:
1377 case vect_external_def:
1379 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1380 gcc_assert (vector_type);
1381 return vect_init_vector (stmt, op, vector_type, NULL);
1384 /* operand is defined inside the loop. */
1385 case vect_internal_def:
1387 /* Get the def from the vectorized stmt. */
1388 def_stmt_info = vinfo_for_stmt (def_stmt);
1390 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1391 /* Get vectorized pattern statement. */
1393 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1394 && !STMT_VINFO_RELEVANT (def_stmt_info))
1395 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1396 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1397 gcc_assert (vec_stmt);
1398 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1399 vec_oprnd = PHI_RESULT (vec_stmt);
1400 else if (is_gimple_call (vec_stmt))
1401 vec_oprnd = gimple_call_lhs (vec_stmt);
1403 vec_oprnd = gimple_assign_lhs (vec_stmt);
1407 /* operand is defined by a loop header phi - reduction */
1408 case vect_reduction_def:
1409 case vect_double_reduction_def:
1410 case vect_nested_cycle:
1411 /* Code should use get_initial_def_for_reduction. */
1414 /* operand is defined by loop-header phi - induction. */
1415 case vect_induction_def:
1417 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1419 /* Get the def from the vectorized stmt. */
1420 def_stmt_info = vinfo_for_stmt (def_stmt);
1421 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1422 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1423 vec_oprnd = PHI_RESULT (vec_stmt);
1425 vec_oprnd = gimple_get_lhs (vec_stmt);
1435 /* Function vect_get_vec_def_for_stmt_copy
1437 Return a vector-def for an operand. This function is used when the
1438 vectorized stmt to be created (by the caller to this function) is a "copy"
1439 created in case the vectorized result cannot fit in one vector, and several
1440 copies of the vector-stmt are required. In this case the vector-def is
1441 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1442 of the stmt that defines VEC_OPRND.
1443 DT is the type of the vector def VEC_OPRND.
1446 In case the vectorization factor (VF) is bigger than the number
1447 of elements that can fit in a vectype (nunits), we have to generate
1448 more than one vector stmt to vectorize the scalar stmt. This situation
1449 arises when there are multiple data-types operated upon in the loop; the
1450 smallest data-type determines the VF, and as a result, when vectorizing
1451 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1452 vector stmt (each computing a vector of 'nunits' results, and together
1453 computing 'VF' results in each iteration). This function is called when
1454 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1455 which VF=16 and nunits=4, so the number of copies required is 4):
1457 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1459 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1460 VS1.1: vx.1 = memref1 VS1.2
1461 VS1.2: vx.2 = memref2 VS1.3
1462 VS1.3: vx.3 = memref3
1464 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1465 VSnew.1: vz1 = vx.1 + ... VSnew.2
1466 VSnew.2: vz2 = vx.2 + ... VSnew.3
1467 VSnew.3: vz3 = vx.3 + ...
1469 The vectorization of S1 is explained in vectorizable_load.
1470 The vectorization of S2:
1471 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1472 the function 'vect_get_vec_def_for_operand' is called to
1473 get the relevant vector-def for each operand of S2. For operand x it
1474 returns the vector-def 'vx.0'.
1476 To create the remaining copies of the vector-stmt (VSnew.j), this
1477 function is called to get the relevant vector-def for each operand. It is
1478 obtained from the respective VS1.j stmt, which is recorded in the
1479 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1481 For example, to obtain the vector-def 'vx.1' in order to create the
1482 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1483 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1484 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1485 and return its def ('vx.1').
1486 Overall, to create the above sequence this function will be called 3 times:
1487 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1488 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1489 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1492 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1494 gimple *vec_stmt_for_operand;
1495 stmt_vec_info def_stmt_info;
1497 /* Do nothing; can reuse same def. */
1498 if (dt == vect_external_def || dt == vect_constant_def )
1501 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1502 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1503 gcc_assert (def_stmt_info);
1504 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1505 gcc_assert (vec_stmt_for_operand);
1506 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1507 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1509 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1514 /* Get vectorized definitions for the operands to create a copy of an original
1515 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1518 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1519 vec<tree> *vec_oprnds0,
1520 vec<tree> *vec_oprnds1)
1522 tree vec_oprnd = vec_oprnds0->pop ();
1524 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1525 vec_oprnds0->quick_push (vec_oprnd);
1527 if (vec_oprnds1 && vec_oprnds1->length ())
1529 vec_oprnd = vec_oprnds1->pop ();
1530 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1531 vec_oprnds1->quick_push (vec_oprnd);
1536 /* Get vectorized definitions for OP0 and OP1.
1537 REDUC_INDEX is the index of reduction operand in case of reduction,
1538 and -1 otherwise. */
1541 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1542 vec<tree> *vec_oprnds0,
1543 vec<tree> *vec_oprnds1,
1544 slp_tree slp_node, int reduc_index)
1548 int nops = (op1 == NULL_TREE) ? 1 : 2;
1549 auto_vec<tree> ops (nops);
1550 auto_vec<vec<tree> > vec_defs (nops);
1552 ops.quick_push (op0);
1554 ops.quick_push (op1);
1556 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1558 *vec_oprnds0 = vec_defs[0];
1560 *vec_oprnds1 = vec_defs[1];
1566 vec_oprnds0->create (1);
1567 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1568 vec_oprnds0->quick_push (vec_oprnd);
1572 vec_oprnds1->create (1);
1573 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1574 vec_oprnds1->quick_push (vec_oprnd);
1580 /* Function vect_finish_stmt_generation.
1582 Insert a new stmt. */
1585 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1586 gimple_stmt_iterator *gsi)
1588 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1589 vec_info *vinfo = stmt_info->vinfo;
1591 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1593 if (!gsi_end_p (*gsi)
1594 && gimple_has_mem_ops (vec_stmt))
1596 gimple *at_stmt = gsi_stmt (*gsi);
1597 tree vuse = gimple_vuse (at_stmt);
1598 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1600 tree vdef = gimple_vdef (at_stmt);
1601 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1602 /* If we have an SSA vuse and insert a store, update virtual
1603 SSA form to avoid triggering the renamer. Do so only
1604 if we can easily see all uses - which is what almost always
1605 happens with the way vectorized stmts are inserted. */
1606 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1607 && ((is_gimple_assign (vec_stmt)
1608 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1609 || (is_gimple_call (vec_stmt)
1610 && !(gimple_call_flags (vec_stmt)
1611 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1613 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1614 gimple_set_vdef (vec_stmt, new_vdef);
1615 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1619 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1621 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1623 if (dump_enabled_p ())
1625 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1626 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1629 gimple_set_location (vec_stmt, gimple_location (stmt));
1631 /* While EH edges will generally prevent vectorization, stmt might
1632 e.g. be in a must-not-throw region. Ensure newly created stmts
1633 that could throw are part of the same region. */
1634 int lp_nr = lookup_stmt_eh_lp (stmt);
1635 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1636 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1639 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1640 a function declaration if the target has a vectorized version
1641 of the function, or NULL_TREE if the function cannot be vectorized. */
1644 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1646 tree fndecl = gimple_call_fndecl (call);
1648 /* We only handle functions that do not read or clobber memory -- i.e.
1649 const or novops ones. */
1650 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1654 || TREE_CODE (fndecl) != FUNCTION_DECL
1655 || !DECL_BUILT_IN (fndecl))
1658 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1663 static tree permute_vec_elements (tree, tree, tree, gimple *,
1664 gimple_stmt_iterator *);
1667 /* Function vectorizable_mask_load_store.
1669 Check if STMT performs a conditional load or store that can be vectorized.
1670 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1671 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1672 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1675 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1676 gimple **vec_stmt, slp_tree slp_node)
1678 tree vec_dest = NULL;
1679 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1680 stmt_vec_info prev_stmt_info;
1681 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1682 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1683 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1684 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1685 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1689 tree dataref_ptr = NULL_TREE;
1691 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1695 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1696 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1697 int gather_scale = 1;
1698 enum vect_def_type gather_dt = vect_unknown_def_type;
1702 enum vect_def_type dt;
1704 if (slp_node != NULL)
1707 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1708 gcc_assert (ncopies >= 1);
1710 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1711 mask = gimple_call_arg (stmt, 2);
1712 if (TYPE_PRECISION (TREE_TYPE (mask))
1713 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1716 /* FORNOW. This restriction should be relaxed. */
1717 if (nested_in_vect_loop && ncopies > 1)
1719 if (dump_enabled_p ())
1720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1721 "multiple types in nested loop.");
1725 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1728 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1731 if (!STMT_VINFO_DATA_REF (stmt_info))
1734 elem_type = TREE_TYPE (vectype);
1736 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1739 if (STMT_VINFO_STRIDED_P (stmt_info))
1742 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1745 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1746 &gather_off, &gather_scale);
1747 gcc_assert (gather_decl);
1748 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1749 &gather_off_vectype))
1751 if (dump_enabled_p ())
1752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1753 "gather index use not simple.");
1757 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1759 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1760 if (TREE_CODE (masktype) == INTEGER_TYPE)
1762 if (dump_enabled_p ())
1763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1764 "masked gather with integer mask not supported.");
1768 else if (tree_int_cst_compare (nested_in_vect_loop
1769 ? STMT_VINFO_DR_STEP (stmt_info)
1770 : DR_STEP (dr), size_zero_node) <= 0)
1772 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1773 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1776 if (TREE_CODE (mask) != SSA_NAME)
1779 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt))
1784 tree rhs = gimple_call_arg (stmt, 3);
1785 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt))
1789 if (!vec_stmt) /* transformation not required. */
1791 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1793 vect_model_store_cost (stmt_info, ncopies, false, dt,
1796 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1802 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1804 tree vec_oprnd0 = NULL_TREE, op;
1805 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1806 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1807 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1808 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1809 tree mask_perm_mask = NULL_TREE;
1810 edge pe = loop_preheader_edge (loop);
1813 enum { NARROW, NONE, WIDEN } modifier;
1814 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1816 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1817 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1818 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1819 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1820 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1821 scaletype = TREE_VALUE (arglist);
1822 gcc_checking_assert (types_compatible_p (srctype, rettype)
1823 && types_compatible_p (srctype, masktype));
1825 if (nunits == gather_off_nunits)
1827 else if (nunits == gather_off_nunits / 2)
1829 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1832 for (i = 0; i < gather_off_nunits; ++i)
1833 sel[i] = i | nunits;
1835 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1837 else if (nunits == gather_off_nunits * 2)
1839 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1842 for (i = 0; i < nunits; ++i)
1843 sel[i] = i < gather_off_nunits
1844 ? i : i + nunits - gather_off_nunits;
1846 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1848 for (i = 0; i < nunits; ++i)
1849 sel[i] = i | gather_off_nunits;
1850 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1855 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1857 ptr = fold_convert (ptrtype, gather_base);
1858 if (!is_gimple_min_invariant (ptr))
1860 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1861 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1862 gcc_assert (!new_bb);
1865 scale = build_int_cst (scaletype, gather_scale);
1867 prev_stmt_info = NULL;
1868 for (j = 0; j < ncopies; ++j)
1870 if (modifier == WIDEN && (j & 1))
1871 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1872 perm_mask, stmt, gsi);
1875 = vect_get_vec_def_for_operand (gather_off, stmt);
1878 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1880 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1882 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1883 == TYPE_VECTOR_SUBPARTS (idxtype));
1884 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
1885 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1887 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1888 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1892 if (mask_perm_mask && (j & 1))
1893 mask_op = permute_vec_elements (mask_op, mask_op,
1894 mask_perm_mask, stmt, gsi);
1898 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
1901 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
1902 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1906 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1908 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1909 == TYPE_VECTOR_SUBPARTS (masktype));
1910 var = vect_get_new_ssa_name (masktype, vect_simple_var);
1911 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1913 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1914 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1920 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1923 if (!useless_type_conversion_p (vectype, rettype))
1925 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1926 == TYPE_VECTOR_SUBPARTS (rettype));
1927 op = vect_get_new_ssa_name (rettype, vect_simple_var);
1928 gimple_call_set_lhs (new_stmt, op);
1929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1930 var = make_ssa_name (vec_dest);
1931 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1932 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1936 var = make_ssa_name (vec_dest, new_stmt);
1937 gimple_call_set_lhs (new_stmt, var);
1940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1942 if (modifier == NARROW)
1949 var = permute_vec_elements (prev_res, var,
1950 perm_mask, stmt, gsi);
1951 new_stmt = SSA_NAME_DEF_STMT (var);
1954 if (prev_stmt_info == NULL)
1955 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1957 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1958 prev_stmt_info = vinfo_for_stmt (new_stmt);
1961 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
1963 tree lhs = gimple_call_lhs (stmt);
1964 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
1965 set_vinfo_for_stmt (new_stmt, stmt_info);
1966 set_vinfo_for_stmt (stmt, NULL);
1967 STMT_VINFO_STMT (stmt_info) = new_stmt;
1968 gsi_replace (gsi, new_stmt, true);
1973 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
1974 prev_stmt_info = NULL;
1975 for (i = 0; i < ncopies; i++)
1977 unsigned align, misalign;
1981 tree rhs = gimple_call_arg (stmt, 3);
1982 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
1983 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
1984 /* We should have catched mismatched types earlier. */
1985 gcc_assert (useless_type_conversion_p (vectype,
1986 TREE_TYPE (vec_rhs)));
1987 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
1988 NULL_TREE, &dummy, gsi,
1989 &ptr_incr, false, &inv_p);
1990 gcc_assert (!inv_p);
1994 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
1995 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
1996 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
1997 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1998 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
1999 TYPE_SIZE_UNIT (vectype));
2002 align = TYPE_ALIGN_UNIT (vectype);
2003 if (aligned_access_p (dr))
2005 else if (DR_MISALIGNMENT (dr) == -1)
2007 align = TYPE_ALIGN_UNIT (elem_type);
2011 misalign = DR_MISALIGNMENT (dr);
2012 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2015 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2016 gimple_call_arg (stmt, 1),
2018 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2020 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2022 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2023 prev_stmt_info = vinfo_for_stmt (new_stmt);
2028 tree vec_mask = NULL_TREE;
2029 prev_stmt_info = NULL;
2030 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2031 for (i = 0; i < ncopies; i++)
2033 unsigned align, misalign;
2037 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2038 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2039 NULL_TREE, &dummy, gsi,
2040 &ptr_incr, false, &inv_p);
2041 gcc_assert (!inv_p);
2045 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2046 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2047 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2048 TYPE_SIZE_UNIT (vectype));
2051 align = TYPE_ALIGN_UNIT (vectype);
2052 if (aligned_access_p (dr))
2054 else if (DR_MISALIGNMENT (dr) == -1)
2056 align = TYPE_ALIGN_UNIT (elem_type);
2060 misalign = DR_MISALIGNMENT (dr);
2061 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2064 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2065 gimple_call_arg (stmt, 1),
2067 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2068 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2070 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2072 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2073 prev_stmt_info = vinfo_for_stmt (new_stmt);
2079 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2081 tree lhs = gimple_call_lhs (stmt);
2082 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2083 set_vinfo_for_stmt (new_stmt, stmt_info);
2084 set_vinfo_for_stmt (stmt, NULL);
2085 STMT_VINFO_STMT (stmt_info) = new_stmt;
2086 gsi_replace (gsi, new_stmt, true);
2093 /* Function vectorizable_call.
2095 Check if GS performs a function call that can be vectorized.
2096 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2097 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2098 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2101 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2108 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2109 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2110 tree vectype_out, vectype_in;
2113 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2114 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2115 vec_info *vinfo = stmt_info->vinfo;
2116 tree fndecl, new_temp, rhs_type;
2118 enum vect_def_type dt[3]
2119 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2120 gimple *new_stmt = NULL;
2122 vec<tree> vargs = vNULL;
2123 enum { NARROW, NONE, WIDEN } modifier;
2127 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2130 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2133 /* Is GS a vectorizable call? */
2134 stmt = dyn_cast <gcall *> (gs);
2138 if (gimple_call_internal_p (stmt)
2139 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2140 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2141 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2144 if (gimple_call_lhs (stmt) == NULL_TREE
2145 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2148 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2150 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2152 /* Process function arguments. */
2153 rhs_type = NULL_TREE;
2154 vectype_in = NULL_TREE;
2155 nargs = gimple_call_num_args (stmt);
2157 /* Bail out if the function has more than three arguments, we do not have
2158 interesting builtin functions to vectorize with more than two arguments
2159 except for fma. No arguments is also not good. */
2160 if (nargs == 0 || nargs > 3)
2163 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2164 if (gimple_call_internal_p (stmt)
2165 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2168 rhs_type = unsigned_type_node;
2171 for (i = 0; i < nargs; i++)
2175 op = gimple_call_arg (stmt, i);
2177 /* We can only handle calls with arguments of the same type. */
2179 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "argument types differ.\n");
2187 rhs_type = TREE_TYPE (op);
2189 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2191 if (dump_enabled_p ())
2192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2193 "use not simple.\n");
2198 vectype_in = opvectype;
2200 && opvectype != vectype_in)
2202 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2204 "argument vector types differ.\n");
2208 /* If all arguments are external or constant defs use a vector type with
2209 the same size as the output vector type. */
2211 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2213 gcc_assert (vectype_in);
2216 if (dump_enabled_p ())
2218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2219 "no vectype for scalar type ");
2220 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2221 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2228 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2229 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2230 if (nunits_in == nunits_out / 2)
2232 else if (nunits_out == nunits_in)
2234 else if (nunits_out == nunits_in / 2)
2239 /* For now, we only vectorize functions if a target specific builtin
2240 is available. TODO -- in some cases, it might be profitable to
2241 insert the calls for pieces of the vector, in order to be able
2242 to vectorize other operations in the loop. */
2243 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2244 if (fndecl == NULL_TREE)
2246 if (gimple_call_internal_p (stmt)
2247 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2250 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2251 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2252 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2253 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2255 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2256 { 0, 1, 2, ... vf - 1 } vector. */
2257 gcc_assert (nargs == 0);
2261 if (dump_enabled_p ())
2262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2263 "function is not vectorizable.\n");
2268 gcc_assert (!gimple_vuse (stmt));
2270 if (slp_node || PURE_SLP_STMT (stmt_info))
2272 else if (modifier == NARROW)
2273 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2275 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2277 /* Sanity check: make sure that at least one copy of the vectorized stmt
2278 needs to be generated. */
2279 gcc_assert (ncopies >= 1);
2281 if (!vec_stmt) /* transformation not required. */
2283 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2287 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2293 if (dump_enabled_p ())
2294 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2297 scalar_dest = gimple_call_lhs (stmt);
2298 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2300 prev_stmt_info = NULL;
2304 for (j = 0; j < ncopies; ++j)
2306 /* Build argument list for the vectorized call. */
2308 vargs.create (nargs);
2314 auto_vec<vec<tree> > vec_defs (nargs);
2315 vec<tree> vec_oprnds0;
2317 for (i = 0; i < nargs; i++)
2318 vargs.quick_push (gimple_call_arg (stmt, i));
2319 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2320 vec_oprnds0 = vec_defs[0];
2322 /* Arguments are ready. Create the new vector stmt. */
2323 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2326 for (k = 0; k < nargs; k++)
2328 vec<tree> vec_oprndsk = vec_defs[k];
2329 vargs[k] = vec_oprndsk[i];
2331 new_stmt = gimple_build_call_vec (fndecl, vargs);
2332 new_temp = make_ssa_name (vec_dest, new_stmt);
2333 gimple_call_set_lhs (new_stmt, new_temp);
2334 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2335 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2338 for (i = 0; i < nargs; i++)
2340 vec<tree> vec_oprndsi = vec_defs[i];
2341 vec_oprndsi.release ();
2346 for (i = 0; i < nargs; i++)
2348 op = gimple_call_arg (stmt, i);
2351 = vect_get_vec_def_for_operand (op, stmt);
2354 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2356 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2359 vargs.quick_push (vec_oprnd0);
2362 if (gimple_call_internal_p (stmt)
2363 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2365 tree *v = XALLOCAVEC (tree, nunits_out);
2367 for (k = 0; k < nunits_out; ++k)
2368 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2369 tree cst = build_vector (vectype_out, v);
2371 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2372 gimple *init_stmt = gimple_build_assign (new_var, cst);
2373 vect_init_vector_1 (stmt, init_stmt, NULL);
2374 new_temp = make_ssa_name (vec_dest);
2375 new_stmt = gimple_build_assign (new_temp, new_var);
2379 new_stmt = gimple_build_call_vec (fndecl, vargs);
2380 new_temp = make_ssa_name (vec_dest, new_stmt);
2381 gimple_call_set_lhs (new_stmt, new_temp);
2383 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2386 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2388 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2390 prev_stmt_info = vinfo_for_stmt (new_stmt);
2396 for (j = 0; j < ncopies; ++j)
2398 /* Build argument list for the vectorized call. */
2400 vargs.create (nargs * 2);
2406 auto_vec<vec<tree> > vec_defs (nargs);
2407 vec<tree> vec_oprnds0;
2409 for (i = 0; i < nargs; i++)
2410 vargs.quick_push (gimple_call_arg (stmt, i));
2411 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2412 vec_oprnds0 = vec_defs[0];
2414 /* Arguments are ready. Create the new vector stmt. */
2415 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2419 for (k = 0; k < nargs; k++)
2421 vec<tree> vec_oprndsk = vec_defs[k];
2422 vargs.quick_push (vec_oprndsk[i]);
2423 vargs.quick_push (vec_oprndsk[i + 1]);
2425 new_stmt = gimple_build_call_vec (fndecl, vargs);
2426 new_temp = make_ssa_name (vec_dest, new_stmt);
2427 gimple_call_set_lhs (new_stmt, new_temp);
2428 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2429 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2432 for (i = 0; i < nargs; i++)
2434 vec<tree> vec_oprndsi = vec_defs[i];
2435 vec_oprndsi.release ();
2440 for (i = 0; i < nargs; i++)
2442 op = gimple_call_arg (stmt, i);
2446 = vect_get_vec_def_for_operand (op, stmt);
2448 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2452 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2454 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2456 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2459 vargs.quick_push (vec_oprnd0);
2460 vargs.quick_push (vec_oprnd1);
2463 new_stmt = gimple_build_call_vec (fndecl, vargs);
2464 new_temp = make_ssa_name (vec_dest, new_stmt);
2465 gimple_call_set_lhs (new_stmt, new_temp);
2466 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2469 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2471 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2473 prev_stmt_info = vinfo_for_stmt (new_stmt);
2476 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2481 /* No current target implements this case. */
2487 /* The call in STMT might prevent it from being removed in dce.
2488 We however cannot remove it here, due to the way the ssa name
2489 it defines is mapped to the new definition. So just replace
2490 rhs of the statement with something harmless. */
2495 type = TREE_TYPE (scalar_dest);
2496 if (is_pattern_stmt_p (stmt_info))
2497 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2499 lhs = gimple_call_lhs (stmt);
2501 if (gimple_call_internal_p (stmt)
2502 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2504 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2505 with vf - 1 rather than 0, that is the last iteration of the
2507 imm_use_iterator iter;
2508 use_operand_p use_p;
2510 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2512 basic_block use_bb = gimple_bb (use_stmt);
2514 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2516 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2517 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2518 ncopies * nunits_out - 1));
2519 update_stmt (use_stmt);
2524 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2525 set_vinfo_for_stmt (new_stmt, stmt_info);
2526 set_vinfo_for_stmt (stmt, NULL);
2527 STMT_VINFO_STMT (stmt_info) = new_stmt;
2528 gsi_replace (gsi, new_stmt, false);
2534 struct simd_call_arg_info
2538 enum vect_def_type dt;
2539 HOST_WIDE_INT linear_step;
2541 bool simd_lane_linear;
2544 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2545 is linear within simd lane (but not within whole loop), note it in
2549 vect_simd_lane_linear (tree op, struct loop *loop,
2550 struct simd_call_arg_info *arginfo)
2552 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2554 if (!is_gimple_assign (def_stmt)
2555 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2556 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2559 tree base = gimple_assign_rhs1 (def_stmt);
2560 HOST_WIDE_INT linear_step = 0;
2561 tree v = gimple_assign_rhs2 (def_stmt);
2562 while (TREE_CODE (v) == SSA_NAME)
2565 def_stmt = SSA_NAME_DEF_STMT (v);
2566 if (is_gimple_assign (def_stmt))
2567 switch (gimple_assign_rhs_code (def_stmt))
2570 t = gimple_assign_rhs2 (def_stmt);
2571 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2573 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2574 v = gimple_assign_rhs1 (def_stmt);
2577 t = gimple_assign_rhs2 (def_stmt);
2578 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2580 linear_step = tree_to_shwi (t);
2581 v = gimple_assign_rhs1 (def_stmt);
2584 t = gimple_assign_rhs1 (def_stmt);
2585 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2586 || (TYPE_PRECISION (TREE_TYPE (v))
2587 < TYPE_PRECISION (TREE_TYPE (t))))
2596 else if (is_gimple_call (def_stmt)
2597 && gimple_call_internal_p (def_stmt)
2598 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2600 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2601 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2606 arginfo->linear_step = linear_step;
2608 arginfo->simd_lane_linear = true;
2614 /* Function vectorizable_simd_clone_call.
2616 Check if STMT performs a function call that can be vectorized
2617 by calling a simd clone of the function.
2618 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2619 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2620 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2623 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2624 gimple **vec_stmt, slp_tree slp_node)
2629 tree vec_oprnd0 = NULL_TREE;
2630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2632 unsigned int nunits;
2633 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2634 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2635 vec_info *vinfo = stmt_info->vinfo;
2636 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2637 tree fndecl, new_temp;
2639 gimple *new_stmt = NULL;
2641 vec<simd_call_arg_info> arginfo = vNULL;
2642 vec<tree> vargs = vNULL;
2644 tree lhs, rtype, ratype;
2645 vec<constructor_elt, va_gc> *ret_ctor_elts;
2647 /* Is STMT a vectorizable call? */
2648 if (!is_gimple_call (stmt))
2651 fndecl = gimple_call_fndecl (stmt);
2652 if (fndecl == NULL_TREE)
2655 struct cgraph_node *node = cgraph_node::get (fndecl);
2656 if (node == NULL || node->simd_clones == NULL)
2659 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2662 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2665 if (gimple_call_lhs (stmt)
2666 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2669 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2671 vectype = STMT_VINFO_VECTYPE (stmt_info);
2673 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2677 if (slp_node || PURE_SLP_STMT (stmt_info))
2680 /* Process function arguments. */
2681 nargs = gimple_call_num_args (stmt);
2683 /* Bail out if the function has zero arguments. */
2687 arginfo.create (nargs);
2689 for (i = 0; i < nargs; i++)
2691 simd_call_arg_info thisarginfo;
2694 thisarginfo.linear_step = 0;
2695 thisarginfo.align = 0;
2696 thisarginfo.op = NULL_TREE;
2697 thisarginfo.simd_lane_linear = false;
2699 op = gimple_call_arg (stmt, i);
2700 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2701 &thisarginfo.vectype)
2702 || thisarginfo.dt == vect_uninitialized_def)
2704 if (dump_enabled_p ())
2705 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2706 "use not simple.\n");
2711 if (thisarginfo.dt == vect_constant_def
2712 || thisarginfo.dt == vect_external_def)
2713 gcc_assert (thisarginfo.vectype == NULL_TREE);
2715 gcc_assert (thisarginfo.vectype != NULL_TREE);
2717 /* For linear arguments, the analyze phase should have saved
2718 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2719 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2720 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2722 gcc_assert (vec_stmt);
2723 thisarginfo.linear_step
2724 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2726 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2727 thisarginfo.simd_lane_linear
2728 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2729 == boolean_true_node);
2730 /* If loop has been peeled for alignment, we need to adjust it. */
2731 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2732 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2733 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2735 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2736 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2737 tree opt = TREE_TYPE (thisarginfo.op);
2738 bias = fold_convert (TREE_TYPE (step), bias);
2739 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2741 = fold_build2 (POINTER_TYPE_P (opt)
2742 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2743 thisarginfo.op, bias);
2747 && thisarginfo.dt != vect_constant_def
2748 && thisarginfo.dt != vect_external_def
2750 && TREE_CODE (op) == SSA_NAME
2751 && simple_iv (loop, loop_containing_stmt (stmt), op,
2753 && tree_fits_shwi_p (iv.step))
2755 thisarginfo.linear_step = tree_to_shwi (iv.step);
2756 thisarginfo.op = iv.base;
2758 else if ((thisarginfo.dt == vect_constant_def
2759 || thisarginfo.dt == vect_external_def)
2760 && POINTER_TYPE_P (TREE_TYPE (op)))
2761 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2762 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2764 if (POINTER_TYPE_P (TREE_TYPE (op))
2765 && !thisarginfo.linear_step
2767 && thisarginfo.dt != vect_constant_def
2768 && thisarginfo.dt != vect_external_def
2771 && TREE_CODE (op) == SSA_NAME)
2772 vect_simd_lane_linear (op, loop, &thisarginfo);
2774 arginfo.quick_push (thisarginfo);
2777 unsigned int badness = 0;
2778 struct cgraph_node *bestn = NULL;
2779 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2780 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2782 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2783 n = n->simdclone->next_clone)
2785 unsigned int this_badness = 0;
2786 if (n->simdclone->simdlen
2787 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2788 || n->simdclone->nargs != nargs)
2790 if (n->simdclone->simdlen
2791 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2792 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2793 - exact_log2 (n->simdclone->simdlen)) * 1024;
2794 if (n->simdclone->inbranch)
2795 this_badness += 2048;
2796 int target_badness = targetm.simd_clone.usable (n);
2797 if (target_badness < 0)
2799 this_badness += target_badness * 512;
2800 /* FORNOW: Have to add code to add the mask argument. */
2801 if (n->simdclone->inbranch)
2803 for (i = 0; i < nargs; i++)
2805 switch (n->simdclone->args[i].arg_type)
2807 case SIMD_CLONE_ARG_TYPE_VECTOR:
2808 if (!useless_type_conversion_p
2809 (n->simdclone->args[i].orig_type,
2810 TREE_TYPE (gimple_call_arg (stmt, i))))
2812 else if (arginfo[i].dt == vect_constant_def
2813 || arginfo[i].dt == vect_external_def
2814 || arginfo[i].linear_step)
2817 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2818 if (arginfo[i].dt != vect_constant_def
2819 && arginfo[i].dt != vect_external_def)
2822 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2823 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
2824 if (arginfo[i].dt == vect_constant_def
2825 || arginfo[i].dt == vect_external_def
2826 || (arginfo[i].linear_step
2827 != n->simdclone->args[i].linear_step))
2830 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2831 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2832 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
2836 case SIMD_CLONE_ARG_TYPE_MASK:
2839 if (i == (size_t) -1)
2841 if (n->simdclone->args[i].alignment > arginfo[i].align)
2846 if (arginfo[i].align)
2847 this_badness += (exact_log2 (arginfo[i].align)
2848 - exact_log2 (n->simdclone->args[i].alignment));
2850 if (i == (size_t) -1)
2852 if (bestn == NULL || this_badness < badness)
2855 badness = this_badness;
2865 for (i = 0; i < nargs; i++)
2866 if ((arginfo[i].dt == vect_constant_def
2867 || arginfo[i].dt == vect_external_def)
2868 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2871 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2873 if (arginfo[i].vectype == NULL
2874 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2875 > bestn->simdclone->simdlen))
2882 fndecl = bestn->decl;
2883 nunits = bestn->simdclone->simdlen;
2884 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2886 /* If the function isn't const, only allow it in simd loops where user
2887 has asserted that at least nunits consecutive iterations can be
2888 performed using SIMD instructions. */
2889 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2890 && gimple_vuse (stmt))
2896 /* Sanity check: make sure that at least one copy of the vectorized stmt
2897 needs to be generated. */
2898 gcc_assert (ncopies >= 1);
2900 if (!vec_stmt) /* transformation not required. */
2902 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2903 for (i = 0; i < nargs; i++)
2904 if (bestn->simdclone->args[i].arg_type
2905 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2907 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
2909 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2910 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2911 ? size_type_node : TREE_TYPE (arginfo[i].op);
2912 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2913 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2914 tree sll = arginfo[i].simd_lane_linear
2915 ? boolean_true_node : boolean_false_node;
2916 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
2918 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2919 if (dump_enabled_p ())
2920 dump_printf_loc (MSG_NOTE, vect_location,
2921 "=== vectorizable_simd_clone_call ===\n");
2922 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2929 if (dump_enabled_p ())
2930 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2933 scalar_dest = gimple_call_lhs (stmt);
2934 vec_dest = NULL_TREE;
2939 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2940 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2941 if (TREE_CODE (rtype) == ARRAY_TYPE)
2944 rtype = TREE_TYPE (ratype);
2948 prev_stmt_info = NULL;
2949 for (j = 0; j < ncopies; ++j)
2951 /* Build argument list for the vectorized call. */
2953 vargs.create (nargs);
2957 for (i = 0; i < nargs; i++)
2959 unsigned int k, l, m, o;
2961 op = gimple_call_arg (stmt, i);
2962 switch (bestn->simdclone->args[i].arg_type)
2964 case SIMD_CLONE_ARG_TYPE_VECTOR:
2965 atype = bestn->simdclone->args[i].vector_type;
2966 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2967 for (m = j * o; m < (j + 1) * o; m++)
2969 if (TYPE_VECTOR_SUBPARTS (atype)
2970 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2972 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2973 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2974 / TYPE_VECTOR_SUBPARTS (atype));
2975 gcc_assert ((k & (k - 1)) == 0);
2978 = vect_get_vec_def_for_operand (op, stmt);
2981 vec_oprnd0 = arginfo[i].op;
2982 if ((m & (k - 1)) == 0)
2984 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2987 arginfo[i].op = vec_oprnd0;
2989 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2991 bitsize_int ((m & (k - 1)) * prec));
2993 = gimple_build_assign (make_ssa_name (atype),
2995 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2996 vargs.safe_push (gimple_assign_lhs (new_stmt));
3000 k = (TYPE_VECTOR_SUBPARTS (atype)
3001 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3002 gcc_assert ((k & (k - 1)) == 0);
3003 vec<constructor_elt, va_gc> *ctor_elts;
3005 vec_alloc (ctor_elts, k);
3008 for (l = 0; l < k; l++)
3010 if (m == 0 && l == 0)
3012 = vect_get_vec_def_for_operand (op, stmt);
3015 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3017 arginfo[i].op = vec_oprnd0;
3020 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3024 vargs.safe_push (vec_oprnd0);
3027 vec_oprnd0 = build_constructor (atype, ctor_elts);
3029 = gimple_build_assign (make_ssa_name (atype),
3031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3032 vargs.safe_push (gimple_assign_lhs (new_stmt));
3037 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3038 vargs.safe_push (op);
3040 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3045 = force_gimple_operand (arginfo[i].op, &stmts, true,
3050 edge pe = loop_preheader_edge (loop);
3051 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3052 gcc_assert (!new_bb);
3054 if (arginfo[i].simd_lane_linear)
3056 vargs.safe_push (arginfo[i].op);
3059 tree phi_res = copy_ssa_name (op);
3060 gphi *new_phi = create_phi_node (phi_res, loop->header);
3061 set_vinfo_for_stmt (new_phi,
3062 new_stmt_vec_info (new_phi, loop_vinfo));
3063 add_phi_arg (new_phi, arginfo[i].op,
3064 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3066 = POINTER_TYPE_P (TREE_TYPE (op))
3067 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3068 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3069 ? sizetype : TREE_TYPE (op);
3071 = wi::mul (bestn->simdclone->args[i].linear_step,
3073 tree tcst = wide_int_to_tree (type, cst);
3074 tree phi_arg = copy_ssa_name (op);
3076 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3077 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3078 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3079 set_vinfo_for_stmt (new_stmt,
3080 new_stmt_vec_info (new_stmt, loop_vinfo));
3081 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3083 arginfo[i].op = phi_res;
3084 vargs.safe_push (phi_res);
3089 = POINTER_TYPE_P (TREE_TYPE (op))
3090 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3091 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3092 ? sizetype : TREE_TYPE (op);
3094 = wi::mul (bestn->simdclone->args[i].linear_step,
3096 tree tcst = wide_int_to_tree (type, cst);
3097 new_temp = make_ssa_name (TREE_TYPE (op));
3098 new_stmt = gimple_build_assign (new_temp, code,
3099 arginfo[i].op, tcst);
3100 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3101 vargs.safe_push (new_temp);
3104 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3110 new_stmt = gimple_build_call_vec (fndecl, vargs);
3113 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3115 new_temp = create_tmp_var (ratype);
3116 else if (TYPE_VECTOR_SUBPARTS (vectype)
3117 == TYPE_VECTOR_SUBPARTS (rtype))
3118 new_temp = make_ssa_name (vec_dest, new_stmt);
3120 new_temp = make_ssa_name (rtype, new_stmt);
3121 gimple_call_set_lhs (new_stmt, new_temp);
3123 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3127 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3130 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3131 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3132 gcc_assert ((k & (k - 1)) == 0);
3133 for (l = 0; l < k; l++)
3138 t = build_fold_addr_expr (new_temp);
3139 t = build2 (MEM_REF, vectype, t,
3140 build_int_cst (TREE_TYPE (t),
3141 l * prec / BITS_PER_UNIT));
3144 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3145 size_int (prec), bitsize_int (l * prec));
3147 = gimple_build_assign (make_ssa_name (vectype), t);
3148 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3149 if (j == 0 && l == 0)
3150 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3152 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3154 prev_stmt_info = vinfo_for_stmt (new_stmt);
3159 tree clobber = build_constructor (ratype, NULL);
3160 TREE_THIS_VOLATILE (clobber) = 1;
3161 new_stmt = gimple_build_assign (new_temp, clobber);
3162 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3166 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3168 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3169 / TYPE_VECTOR_SUBPARTS (rtype));
3170 gcc_assert ((k & (k - 1)) == 0);
3171 if ((j & (k - 1)) == 0)
3172 vec_alloc (ret_ctor_elts, k);
3175 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3176 for (m = 0; m < o; m++)
3178 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3179 size_int (m), NULL_TREE, NULL_TREE);
3181 = gimple_build_assign (make_ssa_name (rtype), tem);
3182 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3183 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3184 gimple_assign_lhs (new_stmt));
3186 tree clobber = build_constructor (ratype, NULL);
3187 TREE_THIS_VOLATILE (clobber) = 1;
3188 new_stmt = gimple_build_assign (new_temp, clobber);
3189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3192 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3193 if ((j & (k - 1)) != k - 1)
3195 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3197 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3198 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3200 if ((unsigned) j == k - 1)
3201 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3203 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3205 prev_stmt_info = vinfo_for_stmt (new_stmt);
3210 tree t = build_fold_addr_expr (new_temp);
3211 t = build2 (MEM_REF, vectype, t,
3212 build_int_cst (TREE_TYPE (t), 0));
3214 = gimple_build_assign (make_ssa_name (vec_dest), t);
3215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3216 tree clobber = build_constructor (ratype, NULL);
3217 TREE_THIS_VOLATILE (clobber) = 1;
3218 vect_finish_stmt_generation (stmt,
3219 gimple_build_assign (new_temp,
3225 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3227 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3229 prev_stmt_info = vinfo_for_stmt (new_stmt);
3234 /* The call in STMT might prevent it from being removed in dce.
3235 We however cannot remove it here, due to the way the ssa name
3236 it defines is mapped to the new definition. So just replace
3237 rhs of the statement with something harmless. */
3244 type = TREE_TYPE (scalar_dest);
3245 if (is_pattern_stmt_p (stmt_info))
3246 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3248 lhs = gimple_call_lhs (stmt);
3249 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3252 new_stmt = gimple_build_nop ();
3253 set_vinfo_for_stmt (new_stmt, stmt_info);
3254 set_vinfo_for_stmt (stmt, NULL);
3255 STMT_VINFO_STMT (stmt_info) = new_stmt;
3256 gsi_replace (gsi, new_stmt, true);
3257 unlink_stmt_vdef (stmt);
3263 /* Function vect_gen_widened_results_half
3265 Create a vector stmt whose code, type, number of arguments, and result
3266 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3267 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3268 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3269 needs to be created (DECL is a function-decl of a target-builtin).
3270 STMT is the original scalar stmt that we are vectorizing. */
3273 vect_gen_widened_results_half (enum tree_code code,
3275 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3276 tree vec_dest, gimple_stmt_iterator *gsi,
3282 /* Generate half of the widened result: */
3283 if (code == CALL_EXPR)
3285 /* Target specific support */
3286 if (op_type == binary_op)
3287 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3289 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3290 new_temp = make_ssa_name (vec_dest, new_stmt);
3291 gimple_call_set_lhs (new_stmt, new_temp);
3295 /* Generic support */
3296 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3297 if (op_type != binary_op)
3299 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3300 new_temp = make_ssa_name (vec_dest, new_stmt);
3301 gimple_assign_set_lhs (new_stmt, new_temp);
3303 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3309 /* Get vectorized definitions for loop-based vectorization. For the first
3310 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3311 scalar operand), and for the rest we get a copy with
3312 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3313 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3314 The vectors are collected into VEC_OPRNDS. */
3317 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3318 vec<tree> *vec_oprnds, int multi_step_cvt)
3322 /* Get first vector operand. */
3323 /* All the vector operands except the very first one (that is scalar oprnd)
3325 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3326 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3328 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3330 vec_oprnds->quick_push (vec_oprnd);
3332 /* Get second vector operand. */
3333 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3334 vec_oprnds->quick_push (vec_oprnd);
3338 /* For conversion in multiple steps, continue to get operands
3341 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3345 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3346 For multi-step conversions store the resulting vectors and call the function
3350 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3351 int multi_step_cvt, gimple *stmt,
3353 gimple_stmt_iterator *gsi,
3354 slp_tree slp_node, enum tree_code code,
3355 stmt_vec_info *prev_stmt_info)
3358 tree vop0, vop1, new_tmp, vec_dest;
3360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3362 vec_dest = vec_dsts.pop ();
3364 for (i = 0; i < vec_oprnds->length (); i += 2)
3366 /* Create demotion operation. */
3367 vop0 = (*vec_oprnds)[i];
3368 vop1 = (*vec_oprnds)[i + 1];
3369 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3370 new_tmp = make_ssa_name (vec_dest, new_stmt);
3371 gimple_assign_set_lhs (new_stmt, new_tmp);
3372 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3375 /* Store the resulting vector for next recursive call. */
3376 (*vec_oprnds)[i/2] = new_tmp;
3379 /* This is the last step of the conversion sequence. Store the
3380 vectors in SLP_NODE or in vector info of the scalar statement
3381 (or in STMT_VINFO_RELATED_STMT chain). */
3383 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3386 if (!*prev_stmt_info)
3387 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3389 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3391 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3396 /* For multi-step demotion operations we first generate demotion operations
3397 from the source type to the intermediate types, and then combine the
3398 results (stored in VEC_OPRNDS) in demotion operation to the destination
3402 /* At each level of recursion we have half of the operands we had at the
3404 vec_oprnds->truncate ((i+1)/2);
3405 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3406 stmt, vec_dsts, gsi, slp_node,
3407 VEC_PACK_TRUNC_EXPR,
3411 vec_dsts.quick_push (vec_dest);
3415 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3416 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3417 the resulting vectors and call the function recursively. */
3420 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3421 vec<tree> *vec_oprnds1,
3422 gimple *stmt, tree vec_dest,
3423 gimple_stmt_iterator *gsi,
3424 enum tree_code code1,
3425 enum tree_code code2, tree decl1,
3426 tree decl2, int op_type)
3429 tree vop0, vop1, new_tmp1, new_tmp2;
3430 gimple *new_stmt1, *new_stmt2;
3431 vec<tree> vec_tmp = vNULL;
3433 vec_tmp.create (vec_oprnds0->length () * 2);
3434 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3436 if (op_type == binary_op)
3437 vop1 = (*vec_oprnds1)[i];
3441 /* Generate the two halves of promotion operation. */
3442 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3443 op_type, vec_dest, gsi, stmt);
3444 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3445 op_type, vec_dest, gsi, stmt);
3446 if (is_gimple_call (new_stmt1))
3448 new_tmp1 = gimple_call_lhs (new_stmt1);
3449 new_tmp2 = gimple_call_lhs (new_stmt2);
3453 new_tmp1 = gimple_assign_lhs (new_stmt1);
3454 new_tmp2 = gimple_assign_lhs (new_stmt2);
3457 /* Store the results for the next step. */
3458 vec_tmp.quick_push (new_tmp1);
3459 vec_tmp.quick_push (new_tmp2);
3462 vec_oprnds0->release ();
3463 *vec_oprnds0 = vec_tmp;
3467 /* Check if STMT performs a conversion operation, that can be vectorized.
3468 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3469 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3470 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3473 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3474 gimple **vec_stmt, slp_tree slp_node)
3478 tree op0, op1 = NULL_TREE;
3479 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3480 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3481 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3482 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3483 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3484 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3487 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3488 gimple *new_stmt = NULL;
3489 stmt_vec_info prev_stmt_info;
3492 tree vectype_out, vectype_in;
3494 tree lhs_type, rhs_type;
3495 enum { NARROW, NONE, WIDEN } modifier;
3496 vec<tree> vec_oprnds0 = vNULL;
3497 vec<tree> vec_oprnds1 = vNULL;
3499 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3500 vec_info *vinfo = stmt_info->vinfo;
3501 int multi_step_cvt = 0;
3502 vec<tree> vec_dsts = vNULL;
3503 vec<tree> interm_types = vNULL;
3504 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3506 machine_mode rhs_mode;
3507 unsigned short fltsz;
3509 /* Is STMT a vectorizable conversion? */
3511 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3514 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3517 if (!is_gimple_assign (stmt))
3520 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3523 code = gimple_assign_rhs_code (stmt);
3524 if (!CONVERT_EXPR_CODE_P (code)
3525 && code != FIX_TRUNC_EXPR
3526 && code != FLOAT_EXPR
3527 && code != WIDEN_MULT_EXPR
3528 && code != WIDEN_LSHIFT_EXPR)
3531 op_type = TREE_CODE_LENGTH (code);
3533 /* Check types of lhs and rhs. */
3534 scalar_dest = gimple_assign_lhs (stmt);
3535 lhs_type = TREE_TYPE (scalar_dest);
3536 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3538 op0 = gimple_assign_rhs1 (stmt);
3539 rhs_type = TREE_TYPE (op0);
3541 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3542 && !((INTEGRAL_TYPE_P (lhs_type)
3543 && INTEGRAL_TYPE_P (rhs_type))
3544 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3545 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3548 if ((INTEGRAL_TYPE_P (lhs_type)
3549 && (TYPE_PRECISION (lhs_type)
3550 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3551 || (INTEGRAL_TYPE_P (rhs_type)
3552 && (TYPE_PRECISION (rhs_type)
3553 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3555 if (dump_enabled_p ())
3556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3557 "type conversion to/from bit-precision unsupported."
3562 /* Check the operands of the operation. */
3563 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3565 if (dump_enabled_p ())
3566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3567 "use not simple.\n");
3570 if (op_type == binary_op)
3574 op1 = gimple_assign_rhs2 (stmt);
3575 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3576 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3578 if (CONSTANT_CLASS_P (op0))
3579 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3581 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3585 if (dump_enabled_p ())
3586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3587 "use not simple.\n");
3592 /* If op0 is an external or constant defs use a vector type of
3593 the same size as the output vector type. */
3595 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3597 gcc_assert (vectype_in);
3600 if (dump_enabled_p ())
3602 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3603 "no vectype for scalar type ");
3604 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3605 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3611 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3612 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3613 if (nunits_in < nunits_out)
3615 else if (nunits_out == nunits_in)
3620 /* Multiple types in SLP are handled by creating the appropriate number of
3621 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3623 if (slp_node || PURE_SLP_STMT (stmt_info))
3625 else if (modifier == NARROW)
3626 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3628 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3630 /* Sanity check: make sure that at least one copy of the vectorized stmt
3631 needs to be generated. */
3632 gcc_assert (ncopies >= 1);
3634 /* Supportable by target? */
3638 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3640 if (supportable_convert_operation (code, vectype_out, vectype_in,
3645 if (dump_enabled_p ())
3646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3647 "conversion not supported by target.\n");
3651 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3652 &code1, &code2, &multi_step_cvt,
3655 /* Binary widening operation can only be supported directly by the
3657 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3661 if (code != FLOAT_EXPR
3662 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3663 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3666 rhs_mode = TYPE_MODE (rhs_type);
3667 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3668 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3669 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3670 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3673 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3674 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3675 if (cvt_type == NULL_TREE)
3678 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3680 if (!supportable_convert_operation (code, vectype_out,
3681 cvt_type, &decl1, &codecvt1))
3684 else if (!supportable_widening_operation (code, stmt, vectype_out,
3685 cvt_type, &codecvt1,
3686 &codecvt2, &multi_step_cvt,
3690 gcc_assert (multi_step_cvt == 0);
3692 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3693 vectype_in, &code1, &code2,
3694 &multi_step_cvt, &interm_types))
3698 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3701 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3702 codecvt2 = ERROR_MARK;
3706 interm_types.safe_push (cvt_type);
3707 cvt_type = NULL_TREE;
3712 gcc_assert (op_type == unary_op);
3713 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3714 &code1, &multi_step_cvt,
3718 if (code != FIX_TRUNC_EXPR
3719 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3720 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3723 rhs_mode = TYPE_MODE (rhs_type);
3725 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3726 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3727 if (cvt_type == NULL_TREE)
3729 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3732 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3733 &code1, &multi_step_cvt,
3742 if (!vec_stmt) /* transformation not required. */
3744 if (dump_enabled_p ())
3745 dump_printf_loc (MSG_NOTE, vect_location,
3746 "=== vectorizable_conversion ===\n");
3747 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3749 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3750 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3752 else if (modifier == NARROW)
3754 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3755 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3759 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3760 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3762 interm_types.release ();
3767 if (dump_enabled_p ())
3768 dump_printf_loc (MSG_NOTE, vect_location,
3769 "transform conversion. ncopies = %d.\n", ncopies);
3771 if (op_type == binary_op)
3773 if (CONSTANT_CLASS_P (op0))
3774 op0 = fold_convert (TREE_TYPE (op1), op0);
3775 else if (CONSTANT_CLASS_P (op1))
3776 op1 = fold_convert (TREE_TYPE (op0), op1);
3779 /* In case of multi-step conversion, we first generate conversion operations
3780 to the intermediate types, and then from that types to the final one.
3781 We create vector destinations for the intermediate type (TYPES) received
3782 from supportable_*_operation, and store them in the correct order
3783 for future use in vect_create_vectorized_*_stmts (). */
3784 vec_dsts.create (multi_step_cvt + 1);
3785 vec_dest = vect_create_destination_var (scalar_dest,
3786 (cvt_type && modifier == WIDEN)
3787 ? cvt_type : vectype_out);
3788 vec_dsts.quick_push (vec_dest);
3792 for (i = interm_types.length () - 1;
3793 interm_types.iterate (i, &intermediate_type); i--)
3795 vec_dest = vect_create_destination_var (scalar_dest,
3797 vec_dsts.quick_push (vec_dest);
3802 vec_dest = vect_create_destination_var (scalar_dest,
3804 ? vectype_out : cvt_type);
3808 if (modifier == WIDEN)
3810 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3811 if (op_type == binary_op)
3812 vec_oprnds1.create (1);
3814 else if (modifier == NARROW)
3815 vec_oprnds0.create (
3816 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3818 else if (code == WIDEN_LSHIFT_EXPR)
3819 vec_oprnds1.create (slp_node->vec_stmts_size);
3822 prev_stmt_info = NULL;
3826 for (j = 0; j < ncopies; j++)
3829 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3832 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3834 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3836 /* Arguments are ready, create the new vector stmt. */
3837 if (code1 == CALL_EXPR)
3839 new_stmt = gimple_build_call (decl1, 1, vop0);
3840 new_temp = make_ssa_name (vec_dest, new_stmt);
3841 gimple_call_set_lhs (new_stmt, new_temp);
3845 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3846 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3847 new_temp = make_ssa_name (vec_dest, new_stmt);
3848 gimple_assign_set_lhs (new_stmt, new_temp);
3851 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3853 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3856 if (!prev_stmt_info)
3857 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3859 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3860 prev_stmt_info = vinfo_for_stmt (new_stmt);
3867 /* In case the vectorization factor (VF) is bigger than the number
3868 of elements that we can fit in a vectype (nunits), we have to
3869 generate more than one vector stmt - i.e - we need to "unroll"
3870 the vector stmt by a factor VF/nunits. */
3871 for (j = 0; j < ncopies; j++)
3878 if (code == WIDEN_LSHIFT_EXPR)
3883 /* Store vec_oprnd1 for every vector stmt to be created
3884 for SLP_NODE. We check during the analysis that all
3885 the shift arguments are the same. */
3886 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3887 vec_oprnds1.quick_push (vec_oprnd1);
3889 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3893 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3894 &vec_oprnds1, slp_node, -1);
3898 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
3899 vec_oprnds0.quick_push (vec_oprnd0);
3900 if (op_type == binary_op)
3902 if (code == WIDEN_LSHIFT_EXPR)
3905 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
3906 vec_oprnds1.quick_push (vec_oprnd1);
3912 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3913 vec_oprnds0.truncate (0);
3914 vec_oprnds0.quick_push (vec_oprnd0);
3915 if (op_type == binary_op)
3917 if (code == WIDEN_LSHIFT_EXPR)
3920 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3922 vec_oprnds1.truncate (0);
3923 vec_oprnds1.quick_push (vec_oprnd1);
3927 /* Arguments are ready. Create the new vector stmts. */
3928 for (i = multi_step_cvt; i >= 0; i--)
3930 tree this_dest = vec_dsts[i];
3931 enum tree_code c1 = code1, c2 = code2;
3932 if (i == 0 && codecvt2 != ERROR_MARK)
3937 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3939 stmt, this_dest, gsi,
3940 c1, c2, decl1, decl2,
3944 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3948 if (codecvt1 == CALL_EXPR)
3950 new_stmt = gimple_build_call (decl1, 1, vop0);
3951 new_temp = make_ssa_name (vec_dest, new_stmt);
3952 gimple_call_set_lhs (new_stmt, new_temp);
3956 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3957 new_temp = make_ssa_name (vec_dest);
3958 new_stmt = gimple_build_assign (new_temp, codecvt1,
3962 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3965 new_stmt = SSA_NAME_DEF_STMT (vop0);
3968 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3971 if (!prev_stmt_info)
3972 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3974 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3975 prev_stmt_info = vinfo_for_stmt (new_stmt);
3980 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3984 /* In case the vectorization factor (VF) is bigger than the number
3985 of elements that we can fit in a vectype (nunits), we have to
3986 generate more than one vector stmt - i.e - we need to "unroll"
3987 the vector stmt by a factor VF/nunits. */
3988 for (j = 0; j < ncopies; j++)
3992 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3996 vec_oprnds0.truncate (0);
3997 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3998 vect_pow2 (multi_step_cvt) - 1);
4001 /* Arguments are ready. Create the new vector stmts. */
4003 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4005 if (codecvt1 == CALL_EXPR)
4007 new_stmt = gimple_build_call (decl1, 1, vop0);
4008 new_temp = make_ssa_name (vec_dest, new_stmt);
4009 gimple_call_set_lhs (new_stmt, new_temp);
4013 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4014 new_temp = make_ssa_name (vec_dest);
4015 new_stmt = gimple_build_assign (new_temp, codecvt1,
4019 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4020 vec_oprnds0[i] = new_temp;
4023 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4024 stmt, vec_dsts, gsi,
4029 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4033 vec_oprnds0.release ();
4034 vec_oprnds1.release ();
4035 vec_dsts.release ();
4036 interm_types.release ();
4042 /* Function vectorizable_assignment.
4044 Check if STMT performs an assignment (copy) that can be vectorized.
4045 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4046 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4047 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4050 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4051 gimple **vec_stmt, slp_tree slp_node)
4056 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4057 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4060 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4063 vec<tree> vec_oprnds = vNULL;
4065 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4066 vec_info *vinfo = stmt_info->vinfo;
4067 gimple *new_stmt = NULL;
4068 stmt_vec_info prev_stmt_info = NULL;
4069 enum tree_code code;
4072 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4075 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4078 /* Is vectorizable assignment? */
4079 if (!is_gimple_assign (stmt))
4082 scalar_dest = gimple_assign_lhs (stmt);
4083 if (TREE_CODE (scalar_dest) != SSA_NAME)
4086 code = gimple_assign_rhs_code (stmt);
4087 if (gimple_assign_single_p (stmt)
4088 || code == PAREN_EXPR
4089 || CONVERT_EXPR_CODE_P (code))
4090 op = gimple_assign_rhs1 (stmt);
4094 if (code == VIEW_CONVERT_EXPR)
4095 op = TREE_OPERAND (op, 0);
4097 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4098 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4100 /* Multiple types in SLP are handled by creating the appropriate number of
4101 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4103 if (slp_node || PURE_SLP_STMT (stmt_info))
4106 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4108 gcc_assert (ncopies >= 1);
4110 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4112 if (dump_enabled_p ())
4113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4114 "use not simple.\n");
4118 /* We can handle NOP_EXPR conversions that do not change the number
4119 of elements or the vector size. */
4120 if ((CONVERT_EXPR_CODE_P (code)
4121 || code == VIEW_CONVERT_EXPR)
4123 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4124 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4125 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4128 /* We do not handle bit-precision changes. */
4129 if ((CONVERT_EXPR_CODE_P (code)
4130 || code == VIEW_CONVERT_EXPR)
4131 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4132 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4133 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4134 || ((TYPE_PRECISION (TREE_TYPE (op))
4135 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4136 /* But a conversion that does not change the bit-pattern is ok. */
4137 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4138 > TYPE_PRECISION (TREE_TYPE (op)))
4139 && TYPE_UNSIGNED (TREE_TYPE (op))))
4141 if (dump_enabled_p ())
4142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4143 "type conversion to/from bit-precision "
4148 if (!vec_stmt) /* transformation not required. */
4150 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4151 if (dump_enabled_p ())
4152 dump_printf_loc (MSG_NOTE, vect_location,
4153 "=== vectorizable_assignment ===\n");
4154 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4159 if (dump_enabled_p ())
4160 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4163 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4166 for (j = 0; j < ncopies; j++)
4170 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4172 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4174 /* Arguments are ready. create the new vector stmt. */
4175 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4177 if (CONVERT_EXPR_CODE_P (code)
4178 || code == VIEW_CONVERT_EXPR)
4179 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4180 new_stmt = gimple_build_assign (vec_dest, vop);
4181 new_temp = make_ssa_name (vec_dest, new_stmt);
4182 gimple_assign_set_lhs (new_stmt, new_temp);
4183 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4185 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4192 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4194 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4196 prev_stmt_info = vinfo_for_stmt (new_stmt);
4199 vec_oprnds.release ();
4204 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4205 either as shift by a scalar or by a vector. */
4208 vect_supportable_shift (enum tree_code code, tree scalar_type)
4211 machine_mode vec_mode;
4216 vectype = get_vectype_for_scalar_type (scalar_type);
4220 optab = optab_for_tree_code (code, vectype, optab_scalar);
4222 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4224 optab = optab_for_tree_code (code, vectype, optab_vector);
4226 || (optab_handler (optab, TYPE_MODE (vectype))
4227 == CODE_FOR_nothing))
4231 vec_mode = TYPE_MODE (vectype);
4232 icode = (int) optab_handler (optab, vec_mode);
4233 if (icode == CODE_FOR_nothing)
4240 /* Function vectorizable_shift.
4242 Check if STMT performs a shift operation that can be vectorized.
4243 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4244 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4245 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4248 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4249 gimple **vec_stmt, slp_tree slp_node)
4253 tree op0, op1 = NULL;
4254 tree vec_oprnd1 = NULL_TREE;
4255 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4257 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4258 enum tree_code code;
4259 machine_mode vec_mode;
4263 machine_mode optab_op2_mode;
4265 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4266 gimple *new_stmt = NULL;
4267 stmt_vec_info prev_stmt_info;
4274 vec<tree> vec_oprnds0 = vNULL;
4275 vec<tree> vec_oprnds1 = vNULL;
4278 bool scalar_shift_arg = true;
4279 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4280 vec_info *vinfo = stmt_info->vinfo;
4283 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4286 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4289 /* Is STMT a vectorizable binary/unary operation? */
4290 if (!is_gimple_assign (stmt))
4293 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4296 code = gimple_assign_rhs_code (stmt);
4298 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4299 || code == RROTATE_EXPR))
4302 scalar_dest = gimple_assign_lhs (stmt);
4303 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4304 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4305 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4307 if (dump_enabled_p ())
4308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4309 "bit-precision shifts not supported.\n");
4313 op0 = gimple_assign_rhs1 (stmt);
4314 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4316 if (dump_enabled_p ())
4317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4318 "use not simple.\n");
4321 /* If op0 is an external or constant def use a vector type with
4322 the same size as the output vector type. */
4324 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4326 gcc_assert (vectype);
4329 if (dump_enabled_p ())
4330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4331 "no vectype for scalar type\n");
4335 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4336 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4337 if (nunits_out != nunits_in)
4340 op1 = gimple_assign_rhs2 (stmt);
4341 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4343 if (dump_enabled_p ())
4344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4345 "use not simple.\n");
4350 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4354 /* Multiple types in SLP are handled by creating the appropriate number of
4355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4357 if (slp_node || PURE_SLP_STMT (stmt_info))
4360 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4362 gcc_assert (ncopies >= 1);
4364 /* Determine whether the shift amount is a vector, or scalar. If the
4365 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4367 if ((dt[1] == vect_internal_def
4368 || dt[1] == vect_induction_def)
4370 scalar_shift_arg = false;
4371 else if (dt[1] == vect_constant_def
4372 || dt[1] == vect_external_def
4373 || dt[1] == vect_internal_def)
4375 /* In SLP, need to check whether the shift count is the same,
4376 in loops if it is a constant or invariant, it is always
4380 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4383 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4384 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4385 scalar_shift_arg = false;
4390 if (dump_enabled_p ())
4391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4392 "operand mode requires invariant argument.\n");
4396 /* Vector shifted by vector. */
4397 if (!scalar_shift_arg)
4399 optab = optab_for_tree_code (code, vectype, optab_vector);
4400 if (dump_enabled_p ())
4401 dump_printf_loc (MSG_NOTE, vect_location,
4402 "vector/vector shift/rotate found.\n");
4405 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4406 if (op1_vectype == NULL_TREE
4407 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4409 if (dump_enabled_p ())
4410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4411 "unusable type for last operand in"
4412 " vector/vector shift/rotate.\n");
4416 /* See if the machine has a vector shifted by scalar insn and if not
4417 then see if it has a vector shifted by vector insn. */
4420 optab = optab_for_tree_code (code, vectype, optab_scalar);
4422 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4424 if (dump_enabled_p ())
4425 dump_printf_loc (MSG_NOTE, vect_location,
4426 "vector/scalar shift/rotate found.\n");
4430 optab = optab_for_tree_code (code, vectype, optab_vector);
4432 && (optab_handler (optab, TYPE_MODE (vectype))
4433 != CODE_FOR_nothing))
4435 scalar_shift_arg = false;
4437 if (dump_enabled_p ())
4438 dump_printf_loc (MSG_NOTE, vect_location,
4439 "vector/vector shift/rotate found.\n");
4441 /* Unlike the other binary operators, shifts/rotates have
4442 the rhs being int, instead of the same type as the lhs,
4443 so make sure the scalar is the right type if we are
4444 dealing with vectors of long long/long/short/char. */
4445 if (dt[1] == vect_constant_def)
4446 op1 = fold_convert (TREE_TYPE (vectype), op1);
4447 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4451 && TYPE_MODE (TREE_TYPE (vectype))
4452 != TYPE_MODE (TREE_TYPE (op1)))
4454 if (dump_enabled_p ())
4455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4456 "unusable type for last operand in"
4457 " vector/vector shift/rotate.\n");
4460 if (vec_stmt && !slp_node)
4462 op1 = fold_convert (TREE_TYPE (vectype), op1);
4463 op1 = vect_init_vector (stmt, op1,
4464 TREE_TYPE (vectype), NULL);
4471 /* Supportable by target? */
4474 if (dump_enabled_p ())
4475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4479 vec_mode = TYPE_MODE (vectype);
4480 icode = (int) optab_handler (optab, vec_mode);
4481 if (icode == CODE_FOR_nothing)
4483 if (dump_enabled_p ())
4484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4485 "op not supported by target.\n");
4486 /* Check only during analysis. */
4487 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4488 || (vf < vect_min_worthwhile_factor (code)
4491 if (dump_enabled_p ())
4492 dump_printf_loc (MSG_NOTE, vect_location,
4493 "proceeding using word mode.\n");
4496 /* Worthwhile without SIMD support? Check only during analysis. */
4497 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4498 && vf < vect_min_worthwhile_factor (code)
4501 if (dump_enabled_p ())
4502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4503 "not worthwhile without SIMD support.\n");
4507 if (!vec_stmt) /* transformation not required. */
4509 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4510 if (dump_enabled_p ())
4511 dump_printf_loc (MSG_NOTE, vect_location,
4512 "=== vectorizable_shift ===\n");
4513 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4519 if (dump_enabled_p ())
4520 dump_printf_loc (MSG_NOTE, vect_location,
4521 "transform binary/unary operation.\n");
4524 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4526 prev_stmt_info = NULL;
4527 for (j = 0; j < ncopies; j++)
4532 if (scalar_shift_arg)
4534 /* Vector shl and shr insn patterns can be defined with scalar
4535 operand 2 (shift operand). In this case, use constant or loop
4536 invariant op1 directly, without extending it to vector mode
4538 optab_op2_mode = insn_data[icode].operand[2].mode;
4539 if (!VECTOR_MODE_P (optab_op2_mode))
4541 if (dump_enabled_p ())
4542 dump_printf_loc (MSG_NOTE, vect_location,
4543 "operand 1 using scalar mode.\n");
4545 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4546 vec_oprnds1.quick_push (vec_oprnd1);
4549 /* Store vec_oprnd1 for every vector stmt to be created
4550 for SLP_NODE. We check during the analysis that all
4551 the shift arguments are the same.
4552 TODO: Allow different constants for different vector
4553 stmts generated for an SLP instance. */
4554 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4555 vec_oprnds1.quick_push (vec_oprnd1);
4560 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4561 (a special case for certain kind of vector shifts); otherwise,
4562 operand 1 should be of a vector type (the usual case). */
4564 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4567 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4571 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4573 /* Arguments are ready. Create the new vector stmt. */
4574 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4576 vop1 = vec_oprnds1[i];
4577 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4578 new_temp = make_ssa_name (vec_dest, new_stmt);
4579 gimple_assign_set_lhs (new_stmt, new_temp);
4580 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4582 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4589 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4591 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4592 prev_stmt_info = vinfo_for_stmt (new_stmt);
4595 vec_oprnds0.release ();
4596 vec_oprnds1.release ();
4602 /* Function vectorizable_operation.
4604 Check if STMT performs a binary, unary or ternary operation that can
4606 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4607 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4608 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4611 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4612 gimple **vec_stmt, slp_tree slp_node)
4616 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4617 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4619 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4620 enum tree_code code;
4621 machine_mode vec_mode;
4625 bool target_support_p;
4627 enum vect_def_type dt[3]
4628 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4629 gimple *new_stmt = NULL;
4630 stmt_vec_info prev_stmt_info;
4636 vec<tree> vec_oprnds0 = vNULL;
4637 vec<tree> vec_oprnds1 = vNULL;
4638 vec<tree> vec_oprnds2 = vNULL;
4639 tree vop0, vop1, vop2;
4640 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4641 vec_info *vinfo = stmt_info->vinfo;
4644 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4647 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4650 /* Is STMT a vectorizable binary/unary operation? */
4651 if (!is_gimple_assign (stmt))
4654 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4657 code = gimple_assign_rhs_code (stmt);
4659 /* For pointer addition, we should use the normal plus for
4660 the vector addition. */
4661 if (code == POINTER_PLUS_EXPR)
4664 /* Support only unary or binary operations. */
4665 op_type = TREE_CODE_LENGTH (code);
4666 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4668 if (dump_enabled_p ())
4669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4670 "num. args = %d (not unary/binary/ternary op).\n",
4675 scalar_dest = gimple_assign_lhs (stmt);
4676 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4678 /* Most operations cannot handle bit-precision types without extra
4680 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4681 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4682 /* Exception are bitwise binary operations. */
4683 && code != BIT_IOR_EXPR
4684 && code != BIT_XOR_EXPR
4685 && code != BIT_AND_EXPR)
4687 if (dump_enabled_p ())
4688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4689 "bit-precision arithmetic not supported.\n");
4693 op0 = gimple_assign_rhs1 (stmt);
4694 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4696 if (dump_enabled_p ())
4697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4698 "use not simple.\n");
4701 /* If op0 is an external or constant def use a vector type with
4702 the same size as the output vector type. */
4704 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4706 gcc_assert (vectype);
4709 if (dump_enabled_p ())
4711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4712 "no vectype for scalar type ");
4713 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4715 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4721 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4722 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4723 if (nunits_out != nunits_in)
4726 if (op_type == binary_op || op_type == ternary_op)
4728 op1 = gimple_assign_rhs2 (stmt);
4729 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
4731 if (dump_enabled_p ())
4732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4733 "use not simple.\n");
4737 if (op_type == ternary_op)
4739 op2 = gimple_assign_rhs3 (stmt);
4740 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
4742 if (dump_enabled_p ())
4743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4744 "use not simple.\n");
4750 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4754 /* Multiple types in SLP are handled by creating the appropriate number of
4755 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4757 if (slp_node || PURE_SLP_STMT (stmt_info))
4760 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4762 gcc_assert (ncopies >= 1);
4764 /* Shifts are handled in vectorizable_shift (). */
4765 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4766 || code == RROTATE_EXPR)
4769 /* Supportable by target? */
4771 vec_mode = TYPE_MODE (vectype);
4772 if (code == MULT_HIGHPART_EXPR)
4773 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4776 optab = optab_for_tree_code (code, vectype, optab_default);
4779 if (dump_enabled_p ())
4780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4784 target_support_p = (optab_handler (optab, vec_mode)
4785 != CODE_FOR_nothing);
4788 if (!target_support_p)
4790 if (dump_enabled_p ())
4791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4792 "op not supported by target.\n");
4793 /* Check only during analysis. */
4794 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4795 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4797 if (dump_enabled_p ())
4798 dump_printf_loc (MSG_NOTE, vect_location,
4799 "proceeding using word mode.\n");
4802 /* Worthwhile without SIMD support? Check only during analysis. */
4803 if (!VECTOR_MODE_P (vec_mode)
4805 && vf < vect_min_worthwhile_factor (code))
4807 if (dump_enabled_p ())
4808 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4809 "not worthwhile without SIMD support.\n");
4813 if (!vec_stmt) /* transformation not required. */
4815 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4816 if (dump_enabled_p ())
4817 dump_printf_loc (MSG_NOTE, vect_location,
4818 "=== vectorizable_operation ===\n");
4819 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4825 if (dump_enabled_p ())
4826 dump_printf_loc (MSG_NOTE, vect_location,
4827 "transform binary/unary operation.\n");
4830 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4832 /* In case the vectorization factor (VF) is bigger than the number
4833 of elements that we can fit in a vectype (nunits), we have to generate
4834 more than one vector stmt - i.e - we need to "unroll" the
4835 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4836 from one copy of the vector stmt to the next, in the field
4837 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4838 stages to find the correct vector defs to be used when vectorizing
4839 stmts that use the defs of the current stmt. The example below
4840 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4841 we need to create 4 vectorized stmts):
4843 before vectorization:
4844 RELATED_STMT VEC_STMT
4848 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4850 RELATED_STMT VEC_STMT
4851 VS1_0: vx0 = memref0 VS1_1 -
4852 VS1_1: vx1 = memref1 VS1_2 -
4853 VS1_2: vx2 = memref2 VS1_3 -
4854 VS1_3: vx3 = memref3 - -
4855 S1: x = load - VS1_0
4858 step2: vectorize stmt S2 (done here):
4859 To vectorize stmt S2 we first need to find the relevant vector
4860 def for the first operand 'x'. This is, as usual, obtained from
4861 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4862 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4863 relevant vector def 'vx0'. Having found 'vx0' we can generate
4864 the vector stmt VS2_0, and as usual, record it in the
4865 STMT_VINFO_VEC_STMT of stmt S2.
4866 When creating the second copy (VS2_1), we obtain the relevant vector
4867 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4868 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4869 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4870 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4871 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4872 chain of stmts and pointers:
4873 RELATED_STMT VEC_STMT
4874 VS1_0: vx0 = memref0 VS1_1 -
4875 VS1_1: vx1 = memref1 VS1_2 -
4876 VS1_2: vx2 = memref2 VS1_3 -
4877 VS1_3: vx3 = memref3 - -
4878 S1: x = load - VS1_0
4879 VS2_0: vz0 = vx0 + v1 VS2_1 -
4880 VS2_1: vz1 = vx1 + v1 VS2_2 -
4881 VS2_2: vz2 = vx2 + v1 VS2_3 -
4882 VS2_3: vz3 = vx3 + v1 - -
4883 S2: z = x + 1 - VS2_0 */
4885 prev_stmt_info = NULL;
4886 for (j = 0; j < ncopies; j++)
4891 if (op_type == binary_op || op_type == ternary_op)
4892 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4895 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4897 if (op_type == ternary_op)
4899 vec_oprnds2.create (1);
4900 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4906 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4907 if (op_type == ternary_op)
4909 tree vec_oprnd = vec_oprnds2.pop ();
4910 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4915 /* Arguments are ready. Create the new vector stmt. */
4916 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4918 vop1 = ((op_type == binary_op || op_type == ternary_op)
4919 ? vec_oprnds1[i] : NULL_TREE);
4920 vop2 = ((op_type == ternary_op)
4921 ? vec_oprnds2[i] : NULL_TREE);
4922 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4923 new_temp = make_ssa_name (vec_dest, new_stmt);
4924 gimple_assign_set_lhs (new_stmt, new_temp);
4925 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4927 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4934 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4936 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4937 prev_stmt_info = vinfo_for_stmt (new_stmt);
4940 vec_oprnds0.release ();
4941 vec_oprnds1.release ();
4942 vec_oprnds2.release ();
4947 /* A helper function to ensure data reference DR's base alignment
4951 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4956 if (DR_VECT_AUX (dr)->base_misaligned)
4958 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4959 tree base_decl = DR_VECT_AUX (dr)->base_decl;
4961 if (decl_in_symtab_p (base_decl))
4962 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4965 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4966 DECL_USER_ALIGN (base_decl) = 1;
4968 DR_VECT_AUX (dr)->base_misaligned = false;
4973 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4974 reversal of the vector elements. If that is impossible to do,
4978 perm_mask_for_reverse (tree vectype)
4983 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4984 sel = XALLOCAVEC (unsigned char, nunits);
4986 for (i = 0; i < nunits; ++i)
4987 sel[i] = nunits - 1 - i;
4989 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4991 return vect_gen_perm_mask_checked (vectype, sel);
4994 /* Function vectorizable_store.
4996 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4998 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4999 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5000 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5003 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5009 tree vec_oprnd = NULL_TREE;
5010 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5011 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5013 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5014 struct loop *loop = NULL;
5015 machine_mode vec_mode;
5017 enum dr_alignment_support alignment_support_scheme;
5019 enum vect_def_type dt;
5020 stmt_vec_info prev_stmt_info = NULL;
5021 tree dataref_ptr = NULL_TREE;
5022 tree dataref_offset = NULL_TREE;
5023 gimple *ptr_incr = NULL;
5026 gimple *next_stmt, *first_stmt = NULL;
5027 bool grouped_store = false;
5028 bool store_lanes_p = false;
5029 unsigned int group_size, i;
5030 vec<tree> dr_chain = vNULL;
5031 vec<tree> oprnds = vNULL;
5032 vec<tree> result_chain = vNULL;
5034 bool negative = false;
5035 tree offset = NULL_TREE;
5036 vec<tree> vec_oprnds = vNULL;
5037 bool slp = (slp_node != NULL);
5038 unsigned int vec_num;
5039 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5040 vec_info *vinfo = stmt_info->vinfo;
5042 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5043 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5044 int scatter_scale = 1;
5045 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5046 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5049 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5052 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5055 /* Is vectorizable store? */
5057 if (!is_gimple_assign (stmt))
5060 scalar_dest = gimple_assign_lhs (stmt);
5061 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5062 && is_pattern_stmt_p (stmt_info))
5063 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5064 if (TREE_CODE (scalar_dest) != ARRAY_REF
5065 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5066 && TREE_CODE (scalar_dest) != INDIRECT_REF
5067 && TREE_CODE (scalar_dest) != COMPONENT_REF
5068 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5069 && TREE_CODE (scalar_dest) != REALPART_EXPR
5070 && TREE_CODE (scalar_dest) != MEM_REF)
5073 gcc_assert (gimple_assign_single_p (stmt));
5075 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5076 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5079 loop = LOOP_VINFO_LOOP (loop_vinfo);
5081 /* Multiple types in SLP are handled by creating the appropriate number of
5082 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5084 if (slp || PURE_SLP_STMT (stmt_info))
5087 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5089 gcc_assert (ncopies >= 1);
5091 /* FORNOW. This restriction should be relaxed. */
5092 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5094 if (dump_enabled_p ())
5095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5096 "multiple types in nested loop.\n");
5100 op = gimple_assign_rhs1 (stmt);
5101 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5103 if (dump_enabled_p ())
5104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5105 "use not simple.\n");
5109 elem_type = TREE_TYPE (vectype);
5110 vec_mode = TYPE_MODE (vectype);
5112 /* FORNOW. In some cases can vectorize even if data-type not supported
5113 (e.g. - array initialization with 0). */
5114 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5117 if (!STMT_VINFO_DATA_REF (stmt_info))
5120 if (!STMT_VINFO_STRIDED_P (stmt_info))
5123 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5124 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5125 size_zero_node) < 0;
5126 if (negative && ncopies > 1)
5128 if (dump_enabled_p ())
5129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5130 "multiple types with negative step.\n");
5135 gcc_assert (!grouped_store);
5136 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5137 if (alignment_support_scheme != dr_aligned
5138 && alignment_support_scheme != dr_unaligned_supported)
5140 if (dump_enabled_p ())
5141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5142 "negative step but alignment required.\n");
5145 if (dt != vect_constant_def
5146 && dt != vect_external_def
5147 && !perm_mask_for_reverse (vectype))
5149 if (dump_enabled_p ())
5150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5151 "negative step and reversing not supported.\n");
5157 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5159 grouped_store = true;
5160 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5161 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5163 && !PURE_SLP_STMT (stmt_info)
5164 && !STMT_VINFO_STRIDED_P (stmt_info))
5166 if (vect_store_lanes_supported (vectype, group_size))
5167 store_lanes_p = true;
5168 else if (!vect_grouped_store_supported (vectype, group_size))
5172 if (STMT_VINFO_STRIDED_P (stmt_info)
5173 && (slp || PURE_SLP_STMT (stmt_info))
5174 && (group_size > nunits
5175 || nunits % group_size != 0))
5177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5178 "unhandled strided group store\n");
5182 if (first_stmt == stmt)
5184 /* STMT is the leader of the group. Check the operands of all the
5185 stmts of the group. */
5186 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5189 gcc_assert (gimple_assign_single_p (next_stmt));
5190 op = gimple_assign_rhs1 (next_stmt);
5191 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5193 if (dump_enabled_p ())
5194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5195 "use not simple.\n");
5198 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5203 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5206 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5207 &scatter_off, &scatter_scale);
5208 gcc_assert (scatter_decl);
5209 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5210 &scatter_off_vectype))
5212 if (dump_enabled_p ())
5213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5214 "scatter index use not simple.");
5219 if (!vec_stmt) /* transformation not required. */
5221 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5222 /* The SLP costs are calculated during SLP analysis. */
5223 if (!PURE_SLP_STMT (stmt_info))
5224 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5231 ensure_base_align (stmt_info, dr);
5233 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5235 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5236 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5237 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5238 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5239 edge pe = loop_preheader_edge (loop);
5242 enum { NARROW, NONE, WIDEN } modifier;
5243 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5245 if (nunits == (unsigned int) scatter_off_nunits)
5247 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5249 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5252 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5253 sel[i] = i | nunits;
5255 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5256 gcc_assert (perm_mask != NULL_TREE);
5258 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5260 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5263 for (i = 0; i < (unsigned int) nunits; ++i)
5264 sel[i] = i | scatter_off_nunits;
5266 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5267 gcc_assert (perm_mask != NULL_TREE);
5273 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5274 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5275 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5276 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5277 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5278 scaletype = TREE_VALUE (arglist);
5280 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5281 && TREE_CODE (rettype) == VOID_TYPE);
5283 ptr = fold_convert (ptrtype, scatter_base);
5284 if (!is_gimple_min_invariant (ptr))
5286 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5287 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5288 gcc_assert (!new_bb);
5291 /* Currently we support only unconditional scatter stores,
5292 so mask should be all ones. */
5293 mask = build_int_cst (masktype, -1);
5294 mask = vect_init_vector (stmt, mask, masktype, NULL);
5296 scale = build_int_cst (scaletype, scatter_scale);
5298 prev_stmt_info = NULL;
5299 for (j = 0; j < ncopies; ++j)
5304 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5306 = vect_get_vec_def_for_operand (scatter_off, stmt);
5308 else if (modifier != NONE && (j & 1))
5310 if (modifier == WIDEN)
5313 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5314 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5317 else if (modifier == NARROW)
5319 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5322 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5330 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5332 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5335 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5337 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5338 == TYPE_VECTOR_SUBPARTS (srctype));
5339 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5340 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5341 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5342 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5346 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5348 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5349 == TYPE_VECTOR_SUBPARTS (idxtype));
5350 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5351 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5352 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5358 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5360 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5362 if (prev_stmt_info == NULL)
5363 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5365 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5366 prev_stmt_info = vinfo_for_stmt (new_stmt);
5373 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5374 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5376 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5379 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5381 /* We vectorize all the stmts of the interleaving group when we
5382 reach the last stmt in the group. */
5383 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5384 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5393 grouped_store = false;
5394 /* VEC_NUM is the number of vect stmts to be created for this
5396 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5397 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5398 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5399 op = gimple_assign_rhs1 (first_stmt);
5402 /* VEC_NUM is the number of vect stmts to be created for this
5404 vec_num = group_size;
5410 group_size = vec_num = 1;
5413 if (dump_enabled_p ())
5414 dump_printf_loc (MSG_NOTE, vect_location,
5415 "transform store. ncopies = %d\n", ncopies);
5417 if (STMT_VINFO_STRIDED_P (stmt_info))
5419 gimple_stmt_iterator incr_gsi;
5425 gimple_seq stmts = NULL;
5426 tree stride_base, stride_step, alias_off;
5430 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5433 = fold_build_pointer_plus
5434 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5435 size_binop (PLUS_EXPR,
5436 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5437 convert_to_ptrofftype (DR_INIT(first_dr))));
5438 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5440 /* For a store with loop-invariant (but other than power-of-2)
5441 stride (i.e. not a grouped access) like so:
5443 for (i = 0; i < n; i += stride)
5446 we generate a new induction variable and new stores from
5447 the components of the (vectorized) rhs:
5449 for (j = 0; ; j += VF*stride)
5454 array[j + stride] = tmp2;
5458 unsigned nstores = nunits;
5459 tree ltype = elem_type;
5462 nstores = nunits / group_size;
5463 if (group_size < nunits)
5464 ltype = build_vector_type (elem_type, group_size);
5467 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5468 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5472 ivstep = stride_step;
5473 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5474 build_int_cst (TREE_TYPE (ivstep),
5475 ncopies * nstores));
5477 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5479 create_iv (stride_base, ivstep, NULL,
5480 loop, &incr_gsi, insert_after,
5482 incr = gsi_stmt (incr_gsi);
5483 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5485 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5487 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5489 prev_stmt_info = NULL;
5490 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5491 next_stmt = first_stmt;
5492 for (g = 0; g < group_size; g++)
5494 running_off = offvar;
5497 tree size = TYPE_SIZE_UNIT (ltype);
5498 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5500 tree newoff = copy_ssa_name (running_off, NULL);
5501 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5503 vect_finish_stmt_generation (stmt, incr, gsi);
5504 running_off = newoff;
5506 for (j = 0; j < ncopies; j++)
5508 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5509 and first_stmt == stmt. */
5514 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5516 vec_oprnd = vec_oprnds[0];
5520 gcc_assert (gimple_assign_single_p (next_stmt));
5521 op = gimple_assign_rhs1 (next_stmt);
5522 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5528 vec_oprnd = vec_oprnds[j];
5531 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5532 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5536 for (i = 0; i < nstores; i++)
5538 tree newref, newoff;
5539 gimple *incr, *assign;
5540 tree size = TYPE_SIZE (ltype);
5541 /* Extract the i'th component. */
5542 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5543 bitsize_int (i), size);
5544 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5547 elem = force_gimple_operand_gsi (gsi, elem, true,
5551 newref = build2 (MEM_REF, ltype,
5552 running_off, alias_off);
5554 /* And store it to *running_off. */
5555 assign = gimple_build_assign (newref, elem);
5556 vect_finish_stmt_generation (stmt, assign, gsi);
5558 newoff = copy_ssa_name (running_off, NULL);
5559 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5560 running_off, stride_step);
5561 vect_finish_stmt_generation (stmt, incr, gsi);
5563 running_off = newoff;
5564 if (g == group_size - 1
5567 if (j == 0 && i == 0)
5568 STMT_VINFO_VEC_STMT (stmt_info)
5569 = *vec_stmt = assign;
5571 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5572 prev_stmt_info = vinfo_for_stmt (assign);
5576 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5581 dr_chain.create (group_size);
5582 oprnds.create (group_size);
5584 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5585 gcc_assert (alignment_support_scheme);
5586 /* Targets with store-lane instructions must not require explicit
5588 gcc_assert (!store_lanes_p
5589 || alignment_support_scheme == dr_aligned
5590 || alignment_support_scheme == dr_unaligned_supported);
5593 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5596 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5598 aggr_type = vectype;
5600 /* In case the vectorization factor (VF) is bigger than the number
5601 of elements that we can fit in a vectype (nunits), we have to generate
5602 more than one vector stmt - i.e - we need to "unroll" the
5603 vector stmt by a factor VF/nunits. For more details see documentation in
5604 vect_get_vec_def_for_copy_stmt. */
5606 /* In case of interleaving (non-unit grouped access):
5613 We create vectorized stores starting from base address (the access of the
5614 first stmt in the chain (S2 in the above example), when the last store stmt
5615 of the chain (S4) is reached:
5618 VS2: &base + vec_size*1 = vx0
5619 VS3: &base + vec_size*2 = vx1
5620 VS4: &base + vec_size*3 = vx3
5622 Then permutation statements are generated:
5624 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5625 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5628 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5629 (the order of the data-refs in the output of vect_permute_store_chain
5630 corresponds to the order of scalar stmts in the interleaving chain - see
5631 the documentation of vect_permute_store_chain()).
5633 In case of both multiple types and interleaving, above vector stores and
5634 permutation stmts are created for every copy. The result vector stmts are
5635 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5636 STMT_VINFO_RELATED_STMT for the next copies.
5639 prev_stmt_info = NULL;
5640 for (j = 0; j < ncopies; j++)
5647 /* Get vectorized arguments for SLP_NODE. */
5648 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5649 NULL, slp_node, -1);
5651 vec_oprnd = vec_oprnds[0];
5655 /* For interleaved stores we collect vectorized defs for all the
5656 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5657 used as an input to vect_permute_store_chain(), and OPRNDS as
5658 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5660 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5661 OPRNDS are of size 1. */
5662 next_stmt = first_stmt;
5663 for (i = 0; i < group_size; i++)
5665 /* Since gaps are not supported for interleaved stores,
5666 GROUP_SIZE is the exact number of stmts in the chain.
5667 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5668 there is no interleaving, GROUP_SIZE is 1, and only one
5669 iteration of the loop will be executed. */
5670 gcc_assert (next_stmt
5671 && gimple_assign_single_p (next_stmt));
5672 op = gimple_assign_rhs1 (next_stmt);
5674 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5675 dr_chain.quick_push (vec_oprnd);
5676 oprnds.quick_push (vec_oprnd);
5677 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5681 /* We should have catched mismatched types earlier. */
5682 gcc_assert (useless_type_conversion_p (vectype,
5683 TREE_TYPE (vec_oprnd)));
5684 bool simd_lane_access_p
5685 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5686 if (simd_lane_access_p
5687 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5688 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5689 && integer_zerop (DR_OFFSET (first_dr))
5690 && integer_zerop (DR_INIT (first_dr))
5691 && alias_sets_conflict_p (get_alias_set (aggr_type),
5692 get_alias_set (DR_REF (first_dr))))
5694 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5695 dataref_offset = build_int_cst (reference_alias_ptr_type
5696 (DR_REF (first_dr)), 0);
5701 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5702 simd_lane_access_p ? loop : NULL,
5703 offset, &dummy, gsi, &ptr_incr,
5704 simd_lane_access_p, &inv_p);
5705 gcc_assert (bb_vinfo || !inv_p);
5709 /* For interleaved stores we created vectorized defs for all the
5710 defs stored in OPRNDS in the previous iteration (previous copy).
5711 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5712 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5714 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5715 OPRNDS are of size 1. */
5716 for (i = 0; i < group_size; i++)
5719 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
5720 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5721 dr_chain[i] = vec_oprnd;
5722 oprnds[i] = vec_oprnd;
5726 = int_const_binop (PLUS_EXPR, dataref_offset,
5727 TYPE_SIZE_UNIT (aggr_type));
5729 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5730 TYPE_SIZE_UNIT (aggr_type));
5737 /* Combine all the vectors into an array. */
5738 vec_array = create_vector_array (vectype, vec_num);
5739 for (i = 0; i < vec_num; i++)
5741 vec_oprnd = dr_chain[i];
5742 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5746 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5747 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5748 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5749 gimple_call_set_lhs (new_stmt, data_ref);
5750 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5758 result_chain.create (group_size);
5760 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5764 next_stmt = first_stmt;
5765 for (i = 0; i < vec_num; i++)
5767 unsigned align, misalign;
5770 /* Bump the vector pointer. */
5771 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5775 vec_oprnd = vec_oprnds[i];
5776 else if (grouped_store)
5777 /* For grouped stores vectorized defs are interleaved in
5778 vect_permute_store_chain(). */
5779 vec_oprnd = result_chain[i];
5781 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5785 : build_int_cst (reference_alias_ptr_type
5786 (DR_REF (first_dr)), 0));
5787 align = TYPE_ALIGN_UNIT (vectype);
5788 if (aligned_access_p (first_dr))
5790 else if (DR_MISALIGNMENT (first_dr) == -1)
5792 if (DR_VECT_AUX (first_dr)->base_element_aligned)
5793 align = TYPE_ALIGN_UNIT (elem_type);
5795 align = get_object_alignment (DR_REF (first_dr))
5798 TREE_TYPE (data_ref)
5799 = build_aligned_type (TREE_TYPE (data_ref),
5800 align * BITS_PER_UNIT);
5804 TREE_TYPE (data_ref)
5805 = build_aligned_type (TREE_TYPE (data_ref),
5806 TYPE_ALIGN (elem_type));
5807 misalign = DR_MISALIGNMENT (first_dr);
5809 if (dataref_offset == NULL_TREE
5810 && TREE_CODE (dataref_ptr) == SSA_NAME)
5811 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5815 && dt != vect_constant_def
5816 && dt != vect_external_def)
5818 tree perm_mask = perm_mask_for_reverse (vectype);
5820 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5822 tree new_temp = make_ssa_name (perm_dest);
5824 /* Generate the permute statement. */
5826 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5827 vec_oprnd, perm_mask);
5828 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5830 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5831 vec_oprnd = new_temp;
5834 /* Arguments are ready. Create the new vector stmt. */
5835 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5836 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5841 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5849 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5851 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5852 prev_stmt_info = vinfo_for_stmt (new_stmt);
5856 dr_chain.release ();
5858 result_chain.release ();
5859 vec_oprnds.release ();
5864 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5865 VECTOR_CST mask. No checks are made that the target platform supports the
5866 mask, so callers may wish to test can_vec_perm_p separately, or use
5867 vect_gen_perm_mask_checked. */
5870 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5872 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5875 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5877 mask_elt_type = lang_hooks.types.type_for_mode
5878 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5879 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5881 mask_elts = XALLOCAVEC (tree, nunits);
5882 for (i = nunits - 1; i >= 0; i--)
5883 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5884 mask_vec = build_vector (mask_type, mask_elts);
5889 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5890 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5893 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5895 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5896 return vect_gen_perm_mask_any (vectype, sel);
5899 /* Given a vector variable X and Y, that was generated for the scalar
5900 STMT, generate instructions to permute the vector elements of X and Y
5901 using permutation mask MASK_VEC, insert them at *GSI and return the
5902 permuted vector variable. */
5905 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
5906 gimple_stmt_iterator *gsi)
5908 tree vectype = TREE_TYPE (x);
5909 tree perm_dest, data_ref;
5912 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5913 data_ref = make_ssa_name (perm_dest);
5915 /* Generate the permute statement. */
5916 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5917 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5922 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5923 inserting them on the loops preheader edge. Returns true if we
5924 were successful in doing so (and thus STMT can be moved then),
5925 otherwise returns false. */
5928 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
5934 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5936 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
5937 if (!gimple_nop_p (def_stmt)
5938 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5940 /* Make sure we don't need to recurse. While we could do
5941 so in simple cases when there are more complex use webs
5942 we don't have an easy way to preserve stmt order to fulfil
5943 dependencies within them. */
5946 if (gimple_code (def_stmt) == GIMPLE_PHI)
5948 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5950 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
5951 if (!gimple_nop_p (def_stmt2)
5952 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5962 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5964 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
5965 if (!gimple_nop_p (def_stmt)
5966 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5968 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5969 gsi_remove (&gsi, false);
5970 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5977 /* vectorizable_load.
5979 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5981 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5982 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5983 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5986 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5987 slp_tree slp_node, slp_instance slp_node_instance)
5990 tree vec_dest = NULL;
5991 tree data_ref = NULL;
5992 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5993 stmt_vec_info prev_stmt_info;
5994 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5995 struct loop *loop = NULL;
5996 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5997 bool nested_in_vect_loop = false;
5998 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6002 gimple *new_stmt = NULL;
6004 enum dr_alignment_support alignment_support_scheme;
6005 tree dataref_ptr = NULL_TREE;
6006 tree dataref_offset = NULL_TREE;
6007 gimple *ptr_incr = NULL;
6009 int i, j, group_size = -1, group_gap_adj;
6010 tree msq = NULL_TREE, lsq;
6011 tree offset = NULL_TREE;
6012 tree byte_offset = NULL_TREE;
6013 tree realignment_token = NULL_TREE;
6015 vec<tree> dr_chain = vNULL;
6016 bool grouped_load = false;
6017 bool load_lanes_p = false;
6020 bool negative = false;
6021 bool compute_in_loop = false;
6022 struct loop *at_loop;
6024 bool slp = (slp_node != NULL);
6025 bool slp_perm = false;
6026 enum tree_code code;
6027 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6030 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6031 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6032 int gather_scale = 1;
6033 enum vect_def_type gather_dt = vect_unknown_def_type;
6034 vec_info *vinfo = stmt_info->vinfo;
6036 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6039 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
6042 /* Is vectorizable load? */
6043 if (!is_gimple_assign (stmt))
6046 scalar_dest = gimple_assign_lhs (stmt);
6047 if (TREE_CODE (scalar_dest) != SSA_NAME)
6050 code = gimple_assign_rhs_code (stmt);
6051 if (code != ARRAY_REF
6052 && code != BIT_FIELD_REF
6053 && code != INDIRECT_REF
6054 && code != COMPONENT_REF
6055 && code != IMAGPART_EXPR
6056 && code != REALPART_EXPR
6058 && TREE_CODE_CLASS (code) != tcc_declaration)
6061 if (!STMT_VINFO_DATA_REF (stmt_info))
6064 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6065 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6069 loop = LOOP_VINFO_LOOP (loop_vinfo);
6070 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6071 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6076 /* Multiple types in SLP are handled by creating the appropriate number of
6077 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6079 if (slp || PURE_SLP_STMT (stmt_info))
6082 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6084 gcc_assert (ncopies >= 1);
6086 /* FORNOW. This restriction should be relaxed. */
6087 if (nested_in_vect_loop && ncopies > 1)
6089 if (dump_enabled_p ())
6090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6091 "multiple types in nested loop.\n");
6095 /* Invalidate assumptions made by dependence analysis when vectorization
6096 on the unrolled body effectively re-orders stmts. */
6098 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6099 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6100 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6102 if (dump_enabled_p ())
6103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6104 "cannot perform implicit CSE when unrolling "
6105 "with negative dependence distance\n");
6109 elem_type = TREE_TYPE (vectype);
6110 mode = TYPE_MODE (vectype);
6112 /* FORNOW. In some cases can vectorize even if data-type not supported
6113 (e.g. - data copies). */
6114 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6116 if (dump_enabled_p ())
6117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6118 "Aligned load, but unsupported type.\n");
6122 /* Check if the load is a part of an interleaving chain. */
6123 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6125 grouped_load = true;
6127 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6129 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6131 /* If this is single-element interleaving with an element distance
6132 that leaves unused vector loads around punt - we at least create
6133 very sub-optimal code in that case (and blow up memory,
6135 if (first_stmt == stmt
6136 && !GROUP_NEXT_ELEMENT (stmt_info)
6137 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6139 if (dump_enabled_p ())
6140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6141 "single-element interleaving not supported "
6142 "for not adjacent vector loads\n");
6146 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6149 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6151 && !PURE_SLP_STMT (stmt_info)
6152 && !STMT_VINFO_STRIDED_P (stmt_info))
6154 if (vect_load_lanes_supported (vectype, group_size))
6155 load_lanes_p = true;
6156 else if (!vect_grouped_load_supported (vectype, group_size))
6160 /* Invalidate assumptions made by dependence analysis when vectorization
6161 on the unrolled body effectively re-orders stmts. */
6162 if (!PURE_SLP_STMT (stmt_info)
6163 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6164 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6165 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6169 "cannot perform implicit CSE when performing "
6170 "group loads with negative dependence distance\n");
6174 /* Similarly when the stmt is a load that is both part of a SLP
6175 instance and a loop vectorized stmt via the same-dr mechanism
6176 we have to give up. */
6177 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6178 && (STMT_SLP_TYPE (stmt_info)
6179 != STMT_SLP_TYPE (vinfo_for_stmt
6180 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6182 if (dump_enabled_p ())
6183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6184 "conflicting SLP types for CSEd load\n");
6190 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6193 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6194 &gather_off, &gather_scale);
6195 gcc_assert (gather_decl);
6196 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6197 &gather_off_vectype))
6199 if (dump_enabled_p ())
6200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6201 "gather index use not simple.\n");
6205 else if (STMT_VINFO_STRIDED_P (stmt_info))
6208 && (slp || PURE_SLP_STMT (stmt_info)))
6209 && (group_size > nunits
6210 || nunits % group_size != 0))
6212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6213 "unhandled strided group load\n");
6219 negative = tree_int_cst_compare (nested_in_vect_loop
6220 ? STMT_VINFO_DR_STEP (stmt_info)
6222 size_zero_node) < 0;
6223 if (negative && ncopies > 1)
6225 if (dump_enabled_p ())
6226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6227 "multiple types with negative step.\n");
6235 if (dump_enabled_p ())
6236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6237 "negative step for group load not supported"
6241 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6242 if (alignment_support_scheme != dr_aligned
6243 && alignment_support_scheme != dr_unaligned_supported)
6245 if (dump_enabled_p ())
6246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6247 "negative step but alignment required.\n");
6250 if (!perm_mask_for_reverse (vectype))
6252 if (dump_enabled_p ())
6253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6254 "negative step and reversing not supported."
6261 if (!vec_stmt) /* transformation not required. */
6263 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6264 /* The SLP costs are calculated during SLP analysis. */
6265 if (!PURE_SLP_STMT (stmt_info))
6266 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6271 if (dump_enabled_p ())
6272 dump_printf_loc (MSG_NOTE, vect_location,
6273 "transform load. ncopies = %d\n", ncopies);
6277 ensure_base_align (stmt_info, dr);
6279 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6281 tree vec_oprnd0 = NULL_TREE, op;
6282 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6283 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6284 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6285 edge pe = loop_preheader_edge (loop);
6288 enum { NARROW, NONE, WIDEN } modifier;
6289 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6291 if (nunits == gather_off_nunits)
6293 else if (nunits == gather_off_nunits / 2)
6295 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6298 for (i = 0; i < gather_off_nunits; ++i)
6299 sel[i] = i | nunits;
6301 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6303 else if (nunits == gather_off_nunits * 2)
6305 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6308 for (i = 0; i < nunits; ++i)
6309 sel[i] = i < gather_off_nunits
6310 ? i : i + nunits - gather_off_nunits;
6312 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6318 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6319 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6320 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6321 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6322 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6323 scaletype = TREE_VALUE (arglist);
6324 gcc_checking_assert (types_compatible_p (srctype, rettype));
6326 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6328 ptr = fold_convert (ptrtype, gather_base);
6329 if (!is_gimple_min_invariant (ptr))
6331 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6332 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6333 gcc_assert (!new_bb);
6336 /* Currently we support only unconditional gather loads,
6337 so mask should be all ones. */
6338 if (TREE_CODE (masktype) == INTEGER_TYPE)
6339 mask = build_int_cst (masktype, -1);
6340 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6342 mask = build_int_cst (TREE_TYPE (masktype), -1);
6343 mask = build_vector_from_val (masktype, mask);
6344 mask = vect_init_vector (stmt, mask, masktype, NULL);
6346 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6350 for (j = 0; j < 6; ++j)
6352 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6353 mask = build_real (TREE_TYPE (masktype), r);
6354 mask = build_vector_from_val (masktype, mask);
6355 mask = vect_init_vector (stmt, mask, masktype, NULL);
6360 scale = build_int_cst (scaletype, gather_scale);
6362 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6363 merge = build_int_cst (TREE_TYPE (rettype), 0);
6364 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6368 for (j = 0; j < 6; ++j)
6370 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6371 merge = build_real (TREE_TYPE (rettype), r);
6375 merge = build_vector_from_val (rettype, merge);
6376 merge = vect_init_vector (stmt, merge, rettype, NULL);
6378 prev_stmt_info = NULL;
6379 for (j = 0; j < ncopies; ++j)
6381 if (modifier == WIDEN && (j & 1))
6382 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6383 perm_mask, stmt, gsi);
6386 = vect_get_vec_def_for_operand (gather_off, stmt);
6389 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6391 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6393 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6394 == TYPE_VECTOR_SUBPARTS (idxtype));
6395 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6396 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6398 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6399 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6404 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6406 if (!useless_type_conversion_p (vectype, rettype))
6408 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6409 == TYPE_VECTOR_SUBPARTS (rettype));
6410 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6411 gimple_call_set_lhs (new_stmt, op);
6412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6413 var = make_ssa_name (vec_dest);
6414 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6416 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6420 var = make_ssa_name (vec_dest, new_stmt);
6421 gimple_call_set_lhs (new_stmt, var);
6424 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6426 if (modifier == NARROW)
6433 var = permute_vec_elements (prev_res, var,
6434 perm_mask, stmt, gsi);
6435 new_stmt = SSA_NAME_DEF_STMT (var);
6438 if (prev_stmt_info == NULL)
6439 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6441 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6442 prev_stmt_info = vinfo_for_stmt (new_stmt);
6446 else if (STMT_VINFO_STRIDED_P (stmt_info))
6448 gimple_stmt_iterator incr_gsi;
6454 vec<constructor_elt, va_gc> *v = NULL;
6455 gimple_seq stmts = NULL;
6456 tree stride_base, stride_step, alias_off;
6458 gcc_assert (!nested_in_vect_loop);
6460 if (slp && grouped_load)
6461 first_dr = STMT_VINFO_DATA_REF
6462 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6467 = fold_build_pointer_plus
6468 (DR_BASE_ADDRESS (first_dr),
6469 size_binop (PLUS_EXPR,
6470 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6471 convert_to_ptrofftype (DR_INIT (first_dr))));
6472 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6474 /* For a load with loop-invariant (but other than power-of-2)
6475 stride (i.e. not a grouped access) like so:
6477 for (i = 0; i < n; i += stride)
6480 we generate a new induction variable and new accesses to
6481 form a new vector (or vectors, depending on ncopies):
6483 for (j = 0; ; j += VF*stride)
6485 tmp2 = array[j + stride];
6487 vectemp = {tmp1, tmp2, ...}
6490 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6491 build_int_cst (TREE_TYPE (stride_step), vf));
6493 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6495 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6496 loop, &incr_gsi, insert_after,
6498 incr = gsi_stmt (incr_gsi);
6499 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6501 stride_step = force_gimple_operand (unshare_expr (stride_step),
6502 &stmts, true, NULL_TREE);
6504 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6506 prev_stmt_info = NULL;
6507 running_off = offvar;
6508 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6509 int nloads = nunits;
6510 tree ltype = TREE_TYPE (vectype);
6511 auto_vec<tree> dr_chain;
6514 nloads = nunits / group_size;
6515 if (group_size < nunits)
6516 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6519 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6520 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6522 dr_chain.create (ncopies);
6524 for (j = 0; j < ncopies; j++)
6530 vec_alloc (v, nloads);
6531 for (i = 0; i < nloads; i++)
6533 tree newref, newoff;
6535 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6537 newref = force_gimple_operand_gsi (gsi, newref, true,
6540 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6541 newoff = copy_ssa_name (running_off);
6542 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6543 running_off, stride_step);
6544 vect_finish_stmt_generation (stmt, incr, gsi);
6546 running_off = newoff;
6549 vec_inv = build_constructor (vectype, v);
6550 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6551 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6555 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6556 build2 (MEM_REF, ltype,
6557 running_off, alias_off));
6558 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6560 tree newoff = copy_ssa_name (running_off);
6561 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6562 running_off, stride_step);
6563 vect_finish_stmt_generation (stmt, incr, gsi);
6565 running_off = newoff;
6570 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6572 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6577 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6580 prev_stmt_info = vinfo_for_stmt (new_stmt);
6584 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6585 slp_node_instance, false);
6591 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6593 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6594 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6595 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6597 /* Check if the chain of loads is already vectorized. */
6598 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6599 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6600 ??? But we can only do so if there is exactly one
6601 as we have no way to get at the rest. Leave the CSE
6603 ??? With the group load eventually participating
6604 in multiple different permutations (having multiple
6605 slp nodes which refer to the same group) the CSE
6606 is even wrong code. See PR56270. */
6609 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6612 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6613 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6616 /* VEC_NUM is the number of vect stmts to be created for this group. */
6619 grouped_load = false;
6620 /* For SLP permutation support we need to load the whole group,
6621 not only the number of vector stmts the permutation result
6624 vec_num = (group_size * vf + nunits - 1) / nunits;
6626 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6627 group_gap_adj = vf * group_size - nunits * vec_num;
6630 vec_num = group_size;
6636 group_size = vec_num = 1;
6640 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6641 gcc_assert (alignment_support_scheme);
6642 /* Targets with load-lane instructions must not require explicit
6644 gcc_assert (!load_lanes_p
6645 || alignment_support_scheme == dr_aligned
6646 || alignment_support_scheme == dr_unaligned_supported);
6648 /* In case the vectorization factor (VF) is bigger than the number
6649 of elements that we can fit in a vectype (nunits), we have to generate
6650 more than one vector stmt - i.e - we need to "unroll" the
6651 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6652 from one copy of the vector stmt to the next, in the field
6653 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6654 stages to find the correct vector defs to be used when vectorizing
6655 stmts that use the defs of the current stmt. The example below
6656 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6657 need to create 4 vectorized stmts):
6659 before vectorization:
6660 RELATED_STMT VEC_STMT
6664 step 1: vectorize stmt S1:
6665 We first create the vector stmt VS1_0, and, as usual, record a
6666 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6667 Next, we create the vector stmt VS1_1, and record a pointer to
6668 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6669 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6671 RELATED_STMT VEC_STMT
6672 VS1_0: vx0 = memref0 VS1_1 -
6673 VS1_1: vx1 = memref1 VS1_2 -
6674 VS1_2: vx2 = memref2 VS1_3 -
6675 VS1_3: vx3 = memref3 - -
6676 S1: x = load - VS1_0
6679 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6680 information we recorded in RELATED_STMT field is used to vectorize
6683 /* In case of interleaving (non-unit grouped access):
6690 Vectorized loads are created in the order of memory accesses
6691 starting from the access of the first stmt of the chain:
6694 VS2: vx1 = &base + vec_size*1
6695 VS3: vx3 = &base + vec_size*2
6696 VS4: vx4 = &base + vec_size*3
6698 Then permutation statements are generated:
6700 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6701 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6704 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6705 (the order of the data-refs in the output of vect_permute_load_chain
6706 corresponds to the order of scalar stmts in the interleaving chain - see
6707 the documentation of vect_permute_load_chain()).
6708 The generation of permutation stmts and recording them in
6709 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6711 In case of both multiple types and interleaving, the vector loads and
6712 permutation stmts above are created for every copy. The result vector
6713 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6714 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6716 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6717 on a target that supports unaligned accesses (dr_unaligned_supported)
6718 we generate the following code:
6722 p = p + indx * vectype_size;
6727 Otherwise, the data reference is potentially unaligned on a target that
6728 does not support unaligned accesses (dr_explicit_realign_optimized) -
6729 then generate the following code, in which the data in each iteration is
6730 obtained by two vector loads, one from the previous iteration, and one
6731 from the current iteration:
6733 msq_init = *(floor(p1))
6734 p2 = initial_addr + VS - 1;
6735 realignment_token = call target_builtin;
6738 p2 = p2 + indx * vectype_size
6740 vec_dest = realign_load (msq, lsq, realignment_token)
6745 /* If the misalignment remains the same throughout the execution of the
6746 loop, we can create the init_addr and permutation mask at the loop
6747 preheader. Otherwise, it needs to be created inside the loop.
6748 This can only occur when vectorizing memory accesses in the inner-loop
6749 nested within an outer-loop that is being vectorized. */
6751 if (nested_in_vect_loop
6752 && (TREE_INT_CST_LOW (DR_STEP (dr))
6753 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6755 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6756 compute_in_loop = true;
6759 if ((alignment_support_scheme == dr_explicit_realign_optimized
6760 || alignment_support_scheme == dr_explicit_realign)
6761 && !compute_in_loop)
6763 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6764 alignment_support_scheme, NULL_TREE,
6766 if (alignment_support_scheme == dr_explicit_realign_optimized)
6768 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6769 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6777 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6780 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6782 aggr_type = vectype;
6784 prev_stmt_info = NULL;
6785 for (j = 0; j < ncopies; j++)
6787 /* 1. Create the vector or array pointer update chain. */
6790 bool simd_lane_access_p
6791 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6792 if (simd_lane_access_p
6793 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6794 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6795 && integer_zerop (DR_OFFSET (first_dr))
6796 && integer_zerop (DR_INIT (first_dr))
6797 && alias_sets_conflict_p (get_alias_set (aggr_type),
6798 get_alias_set (DR_REF (first_dr)))
6799 && (alignment_support_scheme == dr_aligned
6800 || alignment_support_scheme == dr_unaligned_supported))
6802 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6803 dataref_offset = build_int_cst (reference_alias_ptr_type
6804 (DR_REF (first_dr)), 0);
6809 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6810 offset, &dummy, gsi, &ptr_incr,
6811 simd_lane_access_p, &inv_p,
6814 else if (dataref_offset)
6815 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6816 TYPE_SIZE_UNIT (aggr_type));
6818 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6819 TYPE_SIZE_UNIT (aggr_type));
6821 if (grouped_load || slp_perm)
6822 dr_chain.create (vec_num);
6828 vec_array = create_vector_array (vectype, vec_num);
6831 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6832 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6833 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6834 gimple_call_set_lhs (new_stmt, vec_array);
6835 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6837 /* Extract each vector into an SSA_NAME. */
6838 for (i = 0; i < vec_num; i++)
6840 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6842 dr_chain.quick_push (new_temp);
6845 /* Record the mapping between SSA_NAMEs and statements. */
6846 vect_record_grouped_load_vectors (stmt, dr_chain);
6850 for (i = 0; i < vec_num; i++)
6853 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6856 /* 2. Create the vector-load in the loop. */
6857 switch (alignment_support_scheme)
6860 case dr_unaligned_supported:
6862 unsigned int align, misalign;
6865 = fold_build2 (MEM_REF, vectype, dataref_ptr,
6868 : build_int_cst (reference_alias_ptr_type
6869 (DR_REF (first_dr)), 0));
6870 align = TYPE_ALIGN_UNIT (vectype);
6871 if (alignment_support_scheme == dr_aligned)
6873 gcc_assert (aligned_access_p (first_dr));
6876 else if (DR_MISALIGNMENT (first_dr) == -1)
6878 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6879 align = TYPE_ALIGN_UNIT (elem_type);
6881 align = (get_object_alignment (DR_REF (first_dr))
6884 TREE_TYPE (data_ref)
6885 = build_aligned_type (TREE_TYPE (data_ref),
6886 align * BITS_PER_UNIT);
6890 TREE_TYPE (data_ref)
6891 = build_aligned_type (TREE_TYPE (data_ref),
6892 TYPE_ALIGN (elem_type));
6893 misalign = DR_MISALIGNMENT (first_dr);
6895 if (dataref_offset == NULL_TREE
6896 && TREE_CODE (dataref_ptr) == SSA_NAME)
6897 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6901 case dr_explicit_realign:
6905 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6907 if (compute_in_loop)
6908 msq = vect_setup_realignment (first_stmt, gsi,
6910 dr_explicit_realign,
6913 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6914 ptr = copy_ssa_name (dataref_ptr);
6916 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
6917 new_stmt = gimple_build_assign
6918 (ptr, BIT_AND_EXPR, dataref_ptr,
6920 (TREE_TYPE (dataref_ptr),
6921 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6922 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6924 = build2 (MEM_REF, vectype, ptr,
6925 build_int_cst (reference_alias_ptr_type
6926 (DR_REF (first_dr)), 0));
6927 vec_dest = vect_create_destination_var (scalar_dest,
6929 new_stmt = gimple_build_assign (vec_dest, data_ref);
6930 new_temp = make_ssa_name (vec_dest, new_stmt);
6931 gimple_assign_set_lhs (new_stmt, new_temp);
6932 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6933 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6934 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6937 bump = size_binop (MULT_EXPR, vs,
6938 TYPE_SIZE_UNIT (elem_type));
6939 bump = size_binop (MINUS_EXPR, bump, size_one_node);
6940 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6941 new_stmt = gimple_build_assign
6942 (NULL_TREE, BIT_AND_EXPR, ptr,
6945 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6946 ptr = copy_ssa_name (ptr, new_stmt);
6947 gimple_assign_set_lhs (new_stmt, ptr);
6948 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6950 = build2 (MEM_REF, vectype, ptr,
6951 build_int_cst (reference_alias_ptr_type
6952 (DR_REF (first_dr)), 0));
6955 case dr_explicit_realign_optimized:
6956 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6957 new_temp = copy_ssa_name (dataref_ptr);
6959 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
6960 new_stmt = gimple_build_assign
6961 (new_temp, BIT_AND_EXPR, dataref_ptr,
6963 (TREE_TYPE (dataref_ptr),
6964 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6965 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6967 = build2 (MEM_REF, vectype, new_temp,
6968 build_int_cst (reference_alias_ptr_type
6969 (DR_REF (first_dr)), 0));
6974 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6975 new_stmt = gimple_build_assign (vec_dest, data_ref);
6976 new_temp = make_ssa_name (vec_dest, new_stmt);
6977 gimple_assign_set_lhs (new_stmt, new_temp);
6978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6980 /* 3. Handle explicit realignment if necessary/supported.
6982 vec_dest = realign_load (msq, lsq, realignment_token) */
6983 if (alignment_support_scheme == dr_explicit_realign_optimized
6984 || alignment_support_scheme == dr_explicit_realign)
6986 lsq = gimple_assign_lhs (new_stmt);
6987 if (!realignment_token)
6988 realignment_token = dataref_ptr;
6989 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6990 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6991 msq, lsq, realignment_token);
6992 new_temp = make_ssa_name (vec_dest, new_stmt);
6993 gimple_assign_set_lhs (new_stmt, new_temp);
6994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6996 if (alignment_support_scheme == dr_explicit_realign_optimized)
6999 if (i == vec_num - 1 && j == ncopies - 1)
7000 add_phi_arg (phi, lsq,
7001 loop_latch_edge (containing_loop),
7007 /* 4. Handle invariant-load. */
7008 if (inv_p && !bb_vinfo)
7010 gcc_assert (!grouped_load);
7011 /* If we have versioned for aliasing or the loop doesn't
7012 have any data dependencies that would preclude this,
7013 then we are sure this is a loop invariant load and
7014 thus we can insert it on the preheader edge. */
7015 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7016 && !nested_in_vect_loop
7017 && hoist_defs_of_uses (stmt, loop))
7019 if (dump_enabled_p ())
7021 dump_printf_loc (MSG_NOTE, vect_location,
7022 "hoisting out of the vectorized "
7024 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7026 tree tem = copy_ssa_name (scalar_dest);
7027 gsi_insert_on_edge_immediate
7028 (loop_preheader_edge (loop),
7029 gimple_build_assign (tem,
7031 (gimple_assign_rhs1 (stmt))));
7032 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7036 gimple_stmt_iterator gsi2 = *gsi;
7038 new_temp = vect_init_vector (stmt, scalar_dest,
7041 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7042 set_vinfo_for_stmt (new_stmt,
7043 new_stmt_vec_info (new_stmt, vinfo));
7048 tree perm_mask = perm_mask_for_reverse (vectype);
7049 new_temp = permute_vec_elements (new_temp, new_temp,
7050 perm_mask, stmt, gsi);
7051 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7054 /* Collect vector loads and later create their permutation in
7055 vect_transform_grouped_load (). */
7056 if (grouped_load || slp_perm)
7057 dr_chain.quick_push (new_temp);
7059 /* Store vector loads in the corresponding SLP_NODE. */
7060 if (slp && !slp_perm)
7061 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7063 /* Bump the vector pointer to account for a gap or for excess
7064 elements loaded for a permuted SLP load. */
7065 if (group_gap_adj != 0)
7069 = wide_int_to_tree (sizetype,
7070 wi::smul (TYPE_SIZE_UNIT (elem_type),
7071 group_gap_adj, &ovf));
7072 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7077 if (slp && !slp_perm)
7082 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7083 slp_node_instance, false))
7085 dr_chain.release ();
7094 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7095 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7100 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7102 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7103 prev_stmt_info = vinfo_for_stmt (new_stmt);
7106 dr_chain.release ();
7112 /* Function vect_is_simple_cond.
7115 LOOP - the loop that is being vectorized.
7116 COND - Condition that is checked for simple use.
7119 *COMP_VECTYPE - the vector type for the comparison.
7121 Returns whether a COND can be vectorized. Checks whether
7122 condition operands are supportable using vec_is_simple_use. */
7125 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7128 enum vect_def_type dt;
7129 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7131 if (!COMPARISON_CLASS_P (cond))
7134 lhs = TREE_OPERAND (cond, 0);
7135 rhs = TREE_OPERAND (cond, 1);
7137 if (TREE_CODE (lhs) == SSA_NAME)
7139 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7140 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7143 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7144 && TREE_CODE (lhs) != FIXED_CST)
7147 if (TREE_CODE (rhs) == SSA_NAME)
7149 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7150 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7153 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7154 && TREE_CODE (rhs) != FIXED_CST)
7157 *comp_vectype = vectype1 ? vectype1 : vectype2;
7161 /* vectorizable_condition.
7163 Check if STMT is conditional modify expression that can be vectorized.
7164 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7165 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7168 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7169 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7170 else clause if it is 2).
7172 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7175 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7176 gimple **vec_stmt, tree reduc_def, int reduc_index,
7179 tree scalar_dest = NULL_TREE;
7180 tree vec_dest = NULL_TREE;
7181 tree cond_expr, then_clause, else_clause;
7182 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7183 tree comp_vectype = NULL_TREE;
7184 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7185 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7186 tree vec_compare, vec_cond_expr;
7188 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7189 enum vect_def_type dt, dts[4];
7191 enum tree_code code;
7192 stmt_vec_info prev_stmt_info = NULL;
7194 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7195 vec<tree> vec_oprnds0 = vNULL;
7196 vec<tree> vec_oprnds1 = vNULL;
7197 vec<tree> vec_oprnds2 = vNULL;
7198 vec<tree> vec_oprnds3 = vNULL;
7201 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7204 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7206 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7209 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7210 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7214 /* FORNOW: not yet supported. */
7215 if (STMT_VINFO_LIVE_P (stmt_info))
7217 if (dump_enabled_p ())
7218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7219 "value used after loop.\n");
7224 /* Is vectorizable conditional operation? */
7225 if (!is_gimple_assign (stmt))
7228 code = gimple_assign_rhs_code (stmt);
7230 if (code != COND_EXPR)
7233 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7234 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7236 if (slp_node || PURE_SLP_STMT (stmt_info))
7239 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7241 gcc_assert (ncopies >= 1);
7242 if (reduc_index && ncopies > 1)
7243 return false; /* FORNOW */
7245 cond_expr = gimple_assign_rhs1 (stmt);
7246 then_clause = gimple_assign_rhs2 (stmt);
7247 else_clause = gimple_assign_rhs3 (stmt);
7249 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7254 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt))
7256 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt))
7259 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7260 if (vec_cmp_type == NULL_TREE)
7265 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7266 return expand_vec_cond_expr_p (vectype, comp_vectype);
7273 vec_oprnds0.create (1);
7274 vec_oprnds1.create (1);
7275 vec_oprnds2.create (1);
7276 vec_oprnds3.create (1);
7280 scalar_dest = gimple_assign_lhs (stmt);
7281 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7283 /* Handle cond expr. */
7284 for (j = 0; j < ncopies; j++)
7286 gassign *new_stmt = NULL;
7291 auto_vec<tree, 4> ops;
7292 auto_vec<vec<tree>, 4> vec_defs;
7294 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7295 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7296 ops.safe_push (then_clause);
7297 ops.safe_push (else_clause);
7298 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7299 vec_oprnds3 = vec_defs.pop ();
7300 vec_oprnds2 = vec_defs.pop ();
7301 vec_oprnds1 = vec_defs.pop ();
7302 vec_oprnds0 = vec_defs.pop ();
7305 vec_defs.release ();
7311 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt);
7312 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7313 loop_vinfo, >emp, &dts[0]);
7316 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7318 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7319 loop_vinfo, >emp, &dts[1]);
7320 if (reduc_index == 1)
7321 vec_then_clause = reduc_def;
7324 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7326 vect_is_simple_use (then_clause, loop_vinfo,
7329 if (reduc_index == 2)
7330 vec_else_clause = reduc_def;
7333 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7335 vect_is_simple_use (else_clause, loop_vinfo, >emp, &dts[3]);
7341 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7342 vec_oprnds0.pop ());
7343 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7344 vec_oprnds1.pop ());
7345 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7346 vec_oprnds2.pop ());
7347 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7348 vec_oprnds3.pop ());
7353 vec_oprnds0.quick_push (vec_cond_lhs);
7354 vec_oprnds1.quick_push (vec_cond_rhs);
7355 vec_oprnds2.quick_push (vec_then_clause);
7356 vec_oprnds3.quick_push (vec_else_clause);
7359 /* Arguments are ready. Create the new vector stmt. */
7360 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7362 vec_cond_rhs = vec_oprnds1[i];
7363 vec_then_clause = vec_oprnds2[i];
7364 vec_else_clause = vec_oprnds3[i];
7366 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7367 vec_cond_lhs, vec_cond_rhs);
7368 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7369 vec_compare, vec_then_clause, vec_else_clause);
7371 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7372 new_temp = make_ssa_name (vec_dest, new_stmt);
7373 gimple_assign_set_lhs (new_stmt, new_temp);
7374 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7376 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7383 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7385 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7387 prev_stmt_info = vinfo_for_stmt (new_stmt);
7390 vec_oprnds0.release ();
7391 vec_oprnds1.release ();
7392 vec_oprnds2.release ();
7393 vec_oprnds3.release ();
7399 /* Make sure the statement is vectorizable. */
7402 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
7404 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7405 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7406 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7408 tree scalar_type, vectype;
7409 gimple *pattern_stmt;
7410 gimple_seq pattern_def_seq;
7412 if (dump_enabled_p ())
7414 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7415 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7418 if (gimple_has_volatile_ops (stmt))
7420 if (dump_enabled_p ())
7421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7422 "not vectorized: stmt has volatile operands\n");
7427 /* Skip stmts that do not need to be vectorized. In loops this is expected
7429 - the COND_EXPR which is the loop exit condition
7430 - any LABEL_EXPRs in the loop
7431 - computations that are used only for array indexing or loop control.
7432 In basic blocks we only analyze statements that are a part of some SLP
7433 instance, therefore, all the statements are relevant.
7435 Pattern statement needs to be analyzed instead of the original statement
7436 if the original statement is not relevant. Otherwise, we analyze both
7437 statements. In basic blocks we are called from some SLP instance
7438 traversal, don't analyze pattern stmts instead, the pattern stmts
7439 already will be part of SLP instance. */
7441 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7442 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7443 && !STMT_VINFO_LIVE_P (stmt_info))
7445 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7447 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7448 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7450 /* Analyze PATTERN_STMT instead of the original stmt. */
7451 stmt = pattern_stmt;
7452 stmt_info = vinfo_for_stmt (pattern_stmt);
7453 if (dump_enabled_p ())
7455 dump_printf_loc (MSG_NOTE, vect_location,
7456 "==> examining pattern statement: ");
7457 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7462 if (dump_enabled_p ())
7463 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7468 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7471 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7472 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7474 /* Analyze PATTERN_STMT too. */
7475 if (dump_enabled_p ())
7477 dump_printf_loc (MSG_NOTE, vect_location,
7478 "==> examining pattern statement: ");
7479 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7482 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7486 if (is_pattern_stmt_p (stmt_info)
7488 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7490 gimple_stmt_iterator si;
7492 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7494 gimple *pattern_def_stmt = gsi_stmt (si);
7495 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7496 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7498 /* Analyze def stmt of STMT if it's a pattern stmt. */
7499 if (dump_enabled_p ())
7501 dump_printf_loc (MSG_NOTE, vect_location,
7502 "==> examining pattern def statement: ");
7503 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7506 if (!vect_analyze_stmt (pattern_def_stmt,
7507 need_to_vectorize, node))
7513 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7515 case vect_internal_def:
7518 case vect_reduction_def:
7519 case vect_nested_cycle:
7520 gcc_assert (!bb_vinfo
7521 && (relevance == vect_used_in_outer
7522 || relevance == vect_used_in_outer_by_reduction
7523 || relevance == vect_used_by_reduction
7524 || relevance == vect_unused_in_scope));
7527 case vect_induction_def:
7528 case vect_constant_def:
7529 case vect_external_def:
7530 case vect_unknown_def_type:
7537 gcc_assert (PURE_SLP_STMT (stmt_info));
7539 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7540 if (dump_enabled_p ())
7542 dump_printf_loc (MSG_NOTE, vect_location,
7543 "get vectype for scalar type: ");
7544 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7545 dump_printf (MSG_NOTE, "\n");
7548 vectype = get_vectype_for_scalar_type (scalar_type);
7551 if (dump_enabled_p ())
7553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7554 "not SLPed: unsupported data-type ");
7555 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7557 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7562 if (dump_enabled_p ())
7564 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7565 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7566 dump_printf (MSG_NOTE, "\n");
7569 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7572 if (STMT_VINFO_RELEVANT_P (stmt_info))
7574 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7575 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7576 || (is_gimple_call (stmt)
7577 && gimple_call_lhs (stmt) == NULL_TREE));
7578 *need_to_vectorize = true;
7581 if (PURE_SLP_STMT (stmt_info) && !node)
7583 dump_printf_loc (MSG_NOTE, vect_location,
7584 "handled only by SLP analysis\n");
7590 && (STMT_VINFO_RELEVANT_P (stmt_info)
7591 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7592 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7593 || vectorizable_conversion (stmt, NULL, NULL, node)
7594 || vectorizable_shift (stmt, NULL, NULL, node)
7595 || vectorizable_operation (stmt, NULL, NULL, node)
7596 || vectorizable_assignment (stmt, NULL, NULL, node)
7597 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7598 || vectorizable_call (stmt, NULL, NULL, node)
7599 || vectorizable_store (stmt, NULL, NULL, node)
7600 || vectorizable_reduction (stmt, NULL, NULL, node)
7601 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7605 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7606 || vectorizable_conversion (stmt, NULL, NULL, node)
7607 || vectorizable_shift (stmt, NULL, NULL, node)
7608 || vectorizable_operation (stmt, NULL, NULL, node)
7609 || vectorizable_assignment (stmt, NULL, NULL, node)
7610 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7611 || vectorizable_call (stmt, NULL, NULL, node)
7612 || vectorizable_store (stmt, NULL, NULL, node)
7613 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7618 if (dump_enabled_p ())
7620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7621 "not vectorized: relevant stmt not ");
7622 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7623 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7632 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7633 need extra handling, except for vectorizable reductions. */
7634 if (STMT_VINFO_LIVE_P (stmt_info)
7635 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7636 ok = vectorizable_live_operation (stmt, NULL, NULL);
7640 if (dump_enabled_p ())
7642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7643 "not vectorized: live stmt not ");
7644 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7645 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7655 /* Function vect_transform_stmt.
7657 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7660 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
7661 bool *grouped_store, slp_tree slp_node,
7662 slp_instance slp_node_instance)
7664 bool is_store = false;
7665 gimple *vec_stmt = NULL;
7666 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7669 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7671 switch (STMT_VINFO_TYPE (stmt_info))
7673 case type_demotion_vec_info_type:
7674 case type_promotion_vec_info_type:
7675 case type_conversion_vec_info_type:
7676 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7680 case induc_vec_info_type:
7681 gcc_assert (!slp_node);
7682 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7686 case shift_vec_info_type:
7687 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7691 case op_vec_info_type:
7692 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7696 case assignment_vec_info_type:
7697 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7701 case load_vec_info_type:
7702 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7707 case store_vec_info_type:
7708 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7710 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7712 /* In case of interleaving, the whole chain is vectorized when the
7713 last store in the chain is reached. Store stmts before the last
7714 one are skipped, and there vec_stmt_info shouldn't be freed
7716 *grouped_store = true;
7717 if (STMT_VINFO_VEC_STMT (stmt_info))
7724 case condition_vec_info_type:
7725 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7729 case call_vec_info_type:
7730 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7731 stmt = gsi_stmt (*gsi);
7732 if (is_gimple_call (stmt)
7733 && gimple_call_internal_p (stmt)
7734 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7738 case call_simd_clone_vec_info_type:
7739 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7740 stmt = gsi_stmt (*gsi);
7743 case reduc_vec_info_type:
7744 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7749 if (!STMT_VINFO_LIVE_P (stmt_info))
7751 if (dump_enabled_p ())
7752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7753 "stmt not supported.\n");
7758 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
7759 This would break hybrid SLP vectorization. */
7761 gcc_assert (!vec_stmt
7762 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
7764 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7765 is being vectorized, but outside the immediately enclosing loop. */
7767 && STMT_VINFO_LOOP_VINFO (stmt_info)
7768 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7769 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7770 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7771 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7772 || STMT_VINFO_RELEVANT (stmt_info) ==
7773 vect_used_in_outer_by_reduction))
7775 struct loop *innerloop = LOOP_VINFO_LOOP (
7776 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7777 imm_use_iterator imm_iter;
7778 use_operand_p use_p;
7782 if (dump_enabled_p ())
7783 dump_printf_loc (MSG_NOTE, vect_location,
7784 "Record the vdef for outer-loop vectorization.\n");
7786 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7787 (to be used when vectorizing outer-loop stmts that use the DEF of
7789 if (gimple_code (stmt) == GIMPLE_PHI)
7790 scalar_dest = PHI_RESULT (stmt);
7792 scalar_dest = gimple_assign_lhs (stmt);
7794 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7796 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7798 exit_phi = USE_STMT (use_p);
7799 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7804 /* Handle stmts whose DEF is used outside the loop-nest that is
7805 being vectorized. */
7806 if (STMT_VINFO_LIVE_P (stmt_info)
7807 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7809 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7814 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7820 /* Remove a group of stores (for SLP or interleaving), free their
7824 vect_remove_stores (gimple *first_stmt)
7826 gimple *next = first_stmt;
7828 gimple_stmt_iterator next_si;
7832 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7834 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7835 if (is_pattern_stmt_p (stmt_info))
7836 next = STMT_VINFO_RELATED_STMT (stmt_info);
7837 /* Free the attached stmt_vec_info and remove the stmt. */
7838 next_si = gsi_for_stmt (next);
7839 unlink_stmt_vdef (next);
7840 gsi_remove (&next_si, true);
7841 release_defs (next);
7842 free_stmt_vec_info (next);
7848 /* Function new_stmt_vec_info.
7850 Create and initialize a new stmt_vec_info struct for STMT. */
7853 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
7856 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7858 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7859 STMT_VINFO_STMT (res) = stmt;
7861 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7862 STMT_VINFO_LIVE_P (res) = false;
7863 STMT_VINFO_VECTYPE (res) = NULL;
7864 STMT_VINFO_VEC_STMT (res) = NULL;
7865 STMT_VINFO_VECTORIZABLE (res) = true;
7866 STMT_VINFO_IN_PATTERN_P (res) = false;
7867 STMT_VINFO_RELATED_STMT (res) = NULL;
7868 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7869 STMT_VINFO_DATA_REF (res) = NULL;
7870 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7872 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7873 STMT_VINFO_DR_OFFSET (res) = NULL;
7874 STMT_VINFO_DR_INIT (res) = NULL;
7875 STMT_VINFO_DR_STEP (res) = NULL;
7876 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7878 if (gimple_code (stmt) == GIMPLE_PHI
7879 && is_loop_header_bb_p (gimple_bb (stmt)))
7880 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7882 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7884 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7885 STMT_SLP_TYPE (res) = loop_vect;
7886 GROUP_FIRST_ELEMENT (res) = NULL;
7887 GROUP_NEXT_ELEMENT (res) = NULL;
7888 GROUP_SIZE (res) = 0;
7889 GROUP_STORE_COUNT (res) = 0;
7890 GROUP_GAP (res) = 0;
7891 GROUP_SAME_DR_STMT (res) = NULL;
7897 /* Create a hash table for stmt_vec_info. */
7900 init_stmt_vec_info_vec (void)
7902 gcc_assert (!stmt_vec_info_vec.exists ());
7903 stmt_vec_info_vec.create (50);
7907 /* Free hash table for stmt_vec_info. */
7910 free_stmt_vec_info_vec (void)
7914 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7916 free_stmt_vec_info (STMT_VINFO_STMT (info));
7917 gcc_assert (stmt_vec_info_vec.exists ());
7918 stmt_vec_info_vec.release ();
7922 /* Free stmt vectorization related info. */
7925 free_stmt_vec_info (gimple *stmt)
7927 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7932 /* Check if this statement has a related "pattern stmt"
7933 (introduced by the vectorizer during the pattern recognition
7934 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7936 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7938 stmt_vec_info patt_info
7939 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7942 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7943 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
7944 gimple_set_bb (patt_stmt, NULL);
7945 tree lhs = gimple_get_lhs (patt_stmt);
7946 if (TREE_CODE (lhs) == SSA_NAME)
7947 release_ssa_name (lhs);
7950 gimple_stmt_iterator si;
7951 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7953 gimple *seq_stmt = gsi_stmt (si);
7954 gimple_set_bb (seq_stmt, NULL);
7955 lhs = gimple_get_lhs (seq_stmt);
7956 if (TREE_CODE (lhs) == SSA_NAME)
7957 release_ssa_name (lhs);
7958 free_stmt_vec_info (seq_stmt);
7961 free_stmt_vec_info (patt_stmt);
7965 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7966 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7967 set_vinfo_for_stmt (stmt, NULL);
7972 /* Function get_vectype_for_scalar_type_and_size.
7974 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7978 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7980 machine_mode inner_mode = TYPE_MODE (scalar_type);
7981 machine_mode simd_mode;
7982 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7989 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7990 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7993 /* For vector types of elements whose mode precision doesn't
7994 match their types precision we use a element type of mode
7995 precision. The vectorization routines will have to make sure
7996 they support the proper result truncation/extension.
7997 We also make sure to build vector types with INTEGER_TYPE
7998 component type only. */
7999 if (INTEGRAL_TYPE_P (scalar_type)
8000 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8001 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8002 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8003 TYPE_UNSIGNED (scalar_type));
8005 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8006 When the component mode passes the above test simply use a type
8007 corresponding to that mode. The theory is that any use that
8008 would cause problems with this will disable vectorization anyway. */
8009 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8010 && !INTEGRAL_TYPE_P (scalar_type))
8011 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8013 /* We can't build a vector type of elements with alignment bigger than
8015 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8016 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8017 TYPE_UNSIGNED (scalar_type));
8019 /* If we felt back to using the mode fail if there was
8020 no scalar type for it. */
8021 if (scalar_type == NULL_TREE)
8024 /* If no size was supplied use the mode the target prefers. Otherwise
8025 lookup a vector mode of the specified size. */
8027 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8029 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8030 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8034 vectype = build_vector_type (scalar_type, nunits);
8036 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8037 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8043 unsigned int current_vector_size;
8045 /* Function get_vectype_for_scalar_type.
8047 Returns the vector type corresponding to SCALAR_TYPE as supported
8051 get_vectype_for_scalar_type (tree scalar_type)
8054 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8055 current_vector_size);
8057 && current_vector_size == 0)
8058 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8062 /* Function get_same_sized_vectype
8064 Returns a vector type corresponding to SCALAR_TYPE of size
8065 VECTOR_TYPE if supported by the target. */
8068 get_same_sized_vectype (tree scalar_type, tree vector_type)
8070 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8071 return build_same_sized_truth_vector_type (vector_type);
8073 return get_vectype_for_scalar_type_and_size
8074 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8077 /* Function vect_is_simple_use.
8080 VINFO - the vect info of the loop or basic block that is being vectorized.
8081 OPERAND - operand in the loop or bb.
8083 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8084 DT - the type of definition
8086 Returns whether a stmt with OPERAND can be vectorized.
8087 For loops, supportable operands are constants, loop invariants, and operands
8088 that are defined by the current iteration of the loop. Unsupportable
8089 operands are those that are defined by a previous iteration of the loop (as
8090 is the case in reduction/induction computations).
8091 For basic blocks, supportable operands are constants and bb invariants.
8092 For now, operands defined outside the basic block are not supported. */
8095 vect_is_simple_use (tree operand, vec_info *vinfo,
8096 gimple **def_stmt, enum vect_def_type *dt)
8099 *dt = vect_unknown_def_type;
8101 if (dump_enabled_p ())
8103 dump_printf_loc (MSG_NOTE, vect_location,
8104 "vect_is_simple_use: operand ");
8105 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8106 dump_printf (MSG_NOTE, "\n");
8109 if (CONSTANT_CLASS_P (operand))
8111 *dt = vect_constant_def;
8115 if (is_gimple_min_invariant (operand))
8117 *dt = vect_external_def;
8121 if (TREE_CODE (operand) != SSA_NAME)
8123 if (dump_enabled_p ())
8124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8129 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8131 *dt = vect_external_def;
8135 *def_stmt = SSA_NAME_DEF_STMT (operand);
8136 if (dump_enabled_p ())
8138 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8139 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8142 basic_block bb = gimple_bb (*def_stmt);
8143 if ((is_a <loop_vec_info> (vinfo)
8144 && !flow_bb_inside_loop_p (as_a <loop_vec_info> (vinfo)->loop, bb))
8145 || (is_a <bb_vec_info> (vinfo)
8146 && (bb != as_a <bb_vec_info> (vinfo)->bb
8147 || gimple_code (*def_stmt) == GIMPLE_PHI)))
8148 *dt = vect_external_def;
8151 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8152 if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
8153 *dt = vect_external_def;
8155 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8158 if (dump_enabled_p ())
8160 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8163 case vect_uninitialized_def:
8164 dump_printf (MSG_NOTE, "uninitialized\n");
8166 case vect_constant_def:
8167 dump_printf (MSG_NOTE, "constant\n");
8169 case vect_external_def:
8170 dump_printf (MSG_NOTE, "external\n");
8172 case vect_internal_def:
8173 dump_printf (MSG_NOTE, "internal\n");
8175 case vect_induction_def:
8176 dump_printf (MSG_NOTE, "induction\n");
8178 case vect_reduction_def:
8179 dump_printf (MSG_NOTE, "reduction\n");
8181 case vect_double_reduction_def:
8182 dump_printf (MSG_NOTE, "double reduction\n");
8184 case vect_nested_cycle:
8185 dump_printf (MSG_NOTE, "nested cycle\n");
8187 case vect_unknown_def_type:
8188 dump_printf (MSG_NOTE, "unknown\n");
8193 if (*dt == vect_unknown_def_type)
8195 if (dump_enabled_p ())
8196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8197 "Unsupported pattern.\n");
8201 switch (gimple_code (*def_stmt))
8208 if (dump_enabled_p ())
8209 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8210 "unsupported defining stmt:\n");
8217 /* Function vect_is_simple_use.
8219 Same as vect_is_simple_use but also determines the vector operand
8220 type of OPERAND and stores it to *VECTYPE. If the definition of
8221 OPERAND is vect_uninitialized_def, vect_constant_def or
8222 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8223 is responsible to compute the best suited vector type for the
8227 vect_is_simple_use (tree operand, vec_info *vinfo,
8228 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8230 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8233 /* Now get a vector type if the def is internal, otherwise supply
8234 NULL_TREE and leave it up to the caller to figure out a proper
8235 type for the use stmt. */
8236 if (*dt == vect_internal_def
8237 || *dt == vect_induction_def
8238 || *dt == vect_reduction_def
8239 || *dt == vect_double_reduction_def
8240 || *dt == vect_nested_cycle)
8242 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8244 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8245 && !STMT_VINFO_RELEVANT (stmt_info)
8246 && !STMT_VINFO_LIVE_P (stmt_info))
8247 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8249 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8250 gcc_assert (*vectype != NULL_TREE);
8252 else if (*dt == vect_uninitialized_def
8253 || *dt == vect_constant_def
8254 || *dt == vect_external_def)
8255 *vectype = NULL_TREE;
8263 /* Function supportable_widening_operation
8265 Check whether an operation represented by the code CODE is a
8266 widening operation that is supported by the target platform in
8267 vector form (i.e., when operating on arguments of type VECTYPE_IN
8268 producing a result of type VECTYPE_OUT).
8270 Widening operations we currently support are NOP (CONVERT), FLOAT
8271 and WIDEN_MULT. This function checks if these operations are supported
8272 by the target platform either directly (via vector tree-codes), or via
8276 - CODE1 and CODE2 are codes of vector operations to be used when
8277 vectorizing the operation, if available.
8278 - MULTI_STEP_CVT determines the number of required intermediate steps in
8279 case of multi-step conversion (like char->short->int - in that case
8280 MULTI_STEP_CVT will be 1).
8281 - INTERM_TYPES contains the intermediate type required to perform the
8282 widening operation (short in the above example). */
8285 supportable_widening_operation (enum tree_code code, gimple *stmt,
8286 tree vectype_out, tree vectype_in,
8287 enum tree_code *code1, enum tree_code *code2,
8288 int *multi_step_cvt,
8289 vec<tree> *interm_types)
8291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8292 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8293 struct loop *vect_loop = NULL;
8294 machine_mode vec_mode;
8295 enum insn_code icode1, icode2;
8296 optab optab1, optab2;
8297 tree vectype = vectype_in;
8298 tree wide_vectype = vectype_out;
8299 enum tree_code c1, c2;
8301 tree prev_type, intermediate_type;
8302 machine_mode intermediate_mode, prev_mode;
8303 optab optab3, optab4;
8305 *multi_step_cvt = 0;
8307 vect_loop = LOOP_VINFO_LOOP (loop_info);
8311 case WIDEN_MULT_EXPR:
8312 /* The result of a vectorized widening operation usually requires
8313 two vectors (because the widened results do not fit into one vector).
8314 The generated vector results would normally be expected to be
8315 generated in the same order as in the original scalar computation,
8316 i.e. if 8 results are generated in each vector iteration, they are
8317 to be organized as follows:
8318 vect1: [res1,res2,res3,res4],
8319 vect2: [res5,res6,res7,res8].
8321 However, in the special case that the result of the widening
8322 operation is used in a reduction computation only, the order doesn't
8323 matter (because when vectorizing a reduction we change the order of
8324 the computation). Some targets can take advantage of this and
8325 generate more efficient code. For example, targets like Altivec,
8326 that support widen_mult using a sequence of {mult_even,mult_odd}
8327 generate the following vectors:
8328 vect1: [res1,res3,res5,res7],
8329 vect2: [res2,res4,res6,res8].
8331 When vectorizing outer-loops, we execute the inner-loop sequentially
8332 (each vectorized inner-loop iteration contributes to VF outer-loop
8333 iterations in parallel). We therefore don't allow to change the
8334 order of the computation in the inner-loop during outer-loop
8336 /* TODO: Another case in which order doesn't *really* matter is when we
8337 widen and then contract again, e.g. (short)((int)x * y >> 8).
8338 Normally, pack_trunc performs an even/odd permute, whereas the
8339 repack from an even/odd expansion would be an interleave, which
8340 would be significantly simpler for e.g. AVX2. */
8341 /* In any case, in order to avoid duplicating the code below, recurse
8342 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8343 are properly set up for the caller. If we fail, we'll continue with
8344 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8346 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8347 && !nested_in_vect_loop_p (vect_loop, stmt)
8348 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8349 stmt, vectype_out, vectype_in,
8350 code1, code2, multi_step_cvt,
8353 /* Elements in a vector with vect_used_by_reduction property cannot
8354 be reordered if the use chain with this property does not have the
8355 same operation. One such an example is s += a * b, where elements
8356 in a and b cannot be reordered. Here we check if the vector defined
8357 by STMT is only directly used in the reduction statement. */
8358 tree lhs = gimple_assign_lhs (stmt);
8359 use_operand_p dummy;
8361 stmt_vec_info use_stmt_info = NULL;
8362 if (single_imm_use (lhs, &dummy, &use_stmt)
8363 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8364 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8367 c1 = VEC_WIDEN_MULT_LO_EXPR;
8368 c2 = VEC_WIDEN_MULT_HI_EXPR;
8381 case VEC_WIDEN_MULT_EVEN_EXPR:
8382 /* Support the recursion induced just above. */
8383 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8384 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8387 case WIDEN_LSHIFT_EXPR:
8388 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8389 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8393 c1 = VEC_UNPACK_LO_EXPR;
8394 c2 = VEC_UNPACK_HI_EXPR;
8398 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8399 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8402 case FIX_TRUNC_EXPR:
8403 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8404 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8405 computing the operation. */
8412 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8415 if (code == FIX_TRUNC_EXPR)
8417 /* The signedness is determined from output operand. */
8418 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8419 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8423 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8424 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8427 if (!optab1 || !optab2)
8430 vec_mode = TYPE_MODE (vectype);
8431 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8432 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8438 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8439 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8442 /* Check if it's a multi-step conversion that can be done using intermediate
8445 prev_type = vectype;
8446 prev_mode = vec_mode;
8448 if (!CONVERT_EXPR_CODE_P (code))
8451 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8452 intermediate steps in promotion sequence. We try
8453 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8455 interm_types->create (MAX_INTERM_CVT_STEPS);
8456 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8458 intermediate_mode = insn_data[icode1].operand[0].mode;
8460 = lang_hooks.types.type_for_mode (intermediate_mode,
8461 TYPE_UNSIGNED (prev_type));
8462 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8463 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8465 if (!optab3 || !optab4
8466 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8467 || insn_data[icode1].operand[0].mode != intermediate_mode
8468 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8469 || insn_data[icode2].operand[0].mode != intermediate_mode
8470 || ((icode1 = optab_handler (optab3, intermediate_mode))
8471 == CODE_FOR_nothing)
8472 || ((icode2 = optab_handler (optab4, intermediate_mode))
8473 == CODE_FOR_nothing))
8476 interm_types->quick_push (intermediate_type);
8477 (*multi_step_cvt)++;
8479 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8480 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8483 prev_type = intermediate_type;
8484 prev_mode = intermediate_mode;
8487 interm_types->release ();
8492 /* Function supportable_narrowing_operation
8494 Check whether an operation represented by the code CODE is a
8495 narrowing operation that is supported by the target platform in
8496 vector form (i.e., when operating on arguments of type VECTYPE_IN
8497 and producing a result of type VECTYPE_OUT).
8499 Narrowing operations we currently support are NOP (CONVERT) and
8500 FIX_TRUNC. This function checks if these operations are supported by
8501 the target platform directly via vector tree-codes.
8504 - CODE1 is the code of a vector operation to be used when
8505 vectorizing the operation, if available.
8506 - MULTI_STEP_CVT determines the number of required intermediate steps in
8507 case of multi-step conversion (like int->short->char - in that case
8508 MULTI_STEP_CVT will be 1).
8509 - INTERM_TYPES contains the intermediate type required to perform the
8510 narrowing operation (short in the above example). */
8513 supportable_narrowing_operation (enum tree_code code,
8514 tree vectype_out, tree vectype_in,
8515 enum tree_code *code1, int *multi_step_cvt,
8516 vec<tree> *interm_types)
8518 machine_mode vec_mode;
8519 enum insn_code icode1;
8520 optab optab1, interm_optab;
8521 tree vectype = vectype_in;
8522 tree narrow_vectype = vectype_out;
8524 tree intermediate_type;
8525 machine_mode intermediate_mode, prev_mode;
8529 *multi_step_cvt = 0;
8533 c1 = VEC_PACK_TRUNC_EXPR;
8536 case FIX_TRUNC_EXPR:
8537 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8541 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8542 tree code and optabs used for computing the operation. */
8549 if (code == FIX_TRUNC_EXPR)
8550 /* The signedness is determined from output operand. */
8551 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8553 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8558 vec_mode = TYPE_MODE (vectype);
8559 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8564 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8567 /* Check if it's a multi-step conversion that can be done using intermediate
8569 prev_mode = vec_mode;
8570 if (code == FIX_TRUNC_EXPR)
8571 uns = TYPE_UNSIGNED (vectype_out);
8573 uns = TYPE_UNSIGNED (vectype);
8575 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8576 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8577 costly than signed. */
8578 if (code == FIX_TRUNC_EXPR && uns)
8580 enum insn_code icode2;
8583 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8585 = optab_for_tree_code (c1, intermediate_type, optab_default);
8586 if (interm_optab != unknown_optab
8587 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8588 && insn_data[icode1].operand[0].mode
8589 == insn_data[icode2].operand[0].mode)
8592 optab1 = interm_optab;
8597 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8598 intermediate steps in promotion sequence. We try
8599 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8600 interm_types->create (MAX_INTERM_CVT_STEPS);
8601 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8603 intermediate_mode = insn_data[icode1].operand[0].mode;
8605 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8607 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8610 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8611 || insn_data[icode1].operand[0].mode != intermediate_mode
8612 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8613 == CODE_FOR_nothing))
8616 interm_types->quick_push (intermediate_type);
8617 (*multi_step_cvt)++;
8619 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8622 prev_mode = intermediate_mode;
8623 optab1 = interm_optab;
8626 interm_types->release ();