1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "cfglayout.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
43 /* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
47 find_bb_location (basic_block bb)
50 gimple_stmt_iterator si;
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
66 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69 vect_free_slp_tree (slp_tree node)
74 if (SLP_TREE_LEFT (node))
75 vect_free_slp_tree (SLP_TREE_LEFT (node));
77 if (SLP_TREE_RIGHT (node))
78 vect_free_slp_tree (SLP_TREE_RIGHT (node));
80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
89 /* Free the memory allocated for the SLP instance. */
92 vect_free_slp_instance (slp_instance instance)
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
100 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
101 they are of a legal type and that they match the defs of the first stmt of
102 the SLP group (stored in FIRST_STMT_...). */
105 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
106 slp_tree slp_node, gimple stmt,
107 VEC (gimple, heap) **def_stmts0,
108 VEC (gimple, heap) **def_stmts1,
109 enum vect_def_type *first_stmt_dt0,
110 enum vect_def_type *first_stmt_dt1,
111 tree *first_stmt_def0_type,
112 tree *first_stmt_def1_type,
113 tree *first_stmt_const_oprnd,
114 int ncopies_for_cost,
115 bool *pattern0, bool *pattern1)
118 unsigned int i, number_of_oprnds;
121 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
122 stmt_vec_info stmt_info =
123 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
124 enum gimple_rhs_class rhs_class;
125 struct loop *loop = NULL;
126 enum tree_code rhs_code;
127 bool different_types = false;
130 loop = LOOP_VINFO_LOOP (loop_vinfo);
132 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
133 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
135 for (i = 0; i < number_of_oprnds; i++)
137 oprnd = gimple_op (stmt, i + 1);
139 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
141 || (!def_stmt && dt[i] != vect_constant_def))
143 if (vect_print_dump_info (REPORT_SLP))
145 fprintf (vect_dump, "Build SLP failed: can't find def for ");
146 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
152 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
153 from the pattern. Check that all the stmts of the node are in the
155 if (loop && def_stmt && gimple_bb (def_stmt)
156 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
157 && vinfo_for_stmt (def_stmt)
158 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
159 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
160 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
162 if (!*first_stmt_dt0)
166 if (i == 1 && !*first_stmt_dt1)
168 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "Build SLP failed: some of the stmts"
173 " are in a pattern, and others are not ");
174 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
181 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
182 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
184 if (*dt == vect_unknown_def_type)
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "Unsupported pattern.");
191 switch (gimple_code (def_stmt))
194 def[i] = gimple_phi_result (def_stmt);
198 def[i] = gimple_assign_lhs (def_stmt);
202 if (vect_print_dump_info (REPORT_DETAILS))
203 fprintf (vect_dump, "unsupported defining stmt: ");
208 if (!*first_stmt_dt0)
210 /* op0 of the first stmt of the group - store its info. */
211 *first_stmt_dt0 = dt[i];
213 *first_stmt_def0_type = TREE_TYPE (def[i]);
215 *first_stmt_const_oprnd = oprnd;
217 /* Analyze costs (for the first stmt of the group only). */
218 if (rhs_class != GIMPLE_SINGLE_RHS)
219 /* Not memory operation (we don't call this functions for loads). */
220 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
223 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
229 if (!*first_stmt_dt1 && i == 1)
231 /* op1 of the first stmt of the group - store its info. */
232 *first_stmt_dt1 = dt[i];
234 *first_stmt_def1_type = TREE_TYPE (def[i]);
237 /* We assume that the stmt contains only one constant
238 operand. We fail otherwise, to be on the safe side. */
239 if (*first_stmt_const_oprnd)
241 if (vect_print_dump_info (REPORT_SLP))
242 fprintf (vect_dump, "Build SLP failed: two constant "
246 *first_stmt_const_oprnd = oprnd;
251 /* Not first stmt of the group, check that the def-stmt/s match
252 the def-stmt/s of the first stmt. Allow different definition
253 types for reduction chains: the first stmt must be a
254 vect_reduction_def (a phi node), and the rest
255 vect_internal_def. */
257 && ((*first_stmt_dt0 != dt[i]
258 && !(*first_stmt_dt0 == vect_reduction_def
259 && dt[i] == vect_internal_def))
260 || (*first_stmt_def0_type && def[0]
261 && !types_compatible_p (*first_stmt_def0_type,
262 TREE_TYPE (def[0])))))
264 && ((*first_stmt_dt1 != dt[i]
265 && !(*first_stmt_dt1 == vect_reduction_def
266 && dt[i] == vect_internal_def))
267 || (*first_stmt_def1_type && def[1]
268 && !types_compatible_p (*first_stmt_def1_type,
269 TREE_TYPE (def[1])))))
271 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
275 if (i != number_of_oprnds - 1)
276 different_types = true;
279 if (is_gimple_assign (stmt)
280 && (rhs_code = gimple_assign_rhs_code (stmt))
281 && TREE_CODE_CLASS (rhs_code) == tcc_binary
282 && commutative_tree_code (rhs_code)
283 && *first_stmt_dt0 == dt[1]
284 && *first_stmt_dt1 == dt[0]
286 && !(*first_stmt_def0_type
287 && !types_compatible_p (*first_stmt_def0_type,
289 && !(*first_stmt_def1_type
290 && !types_compatible_p (*first_stmt_def1_type,
291 TREE_TYPE (def[0]))))
293 if (vect_print_dump_info (REPORT_SLP))
295 fprintf (vect_dump, "Swapping operands of ");
296 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
299 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
300 gimple_assign_rhs2_ptr (stmt));
304 if (vect_print_dump_info (REPORT_SLP))
305 fprintf (vect_dump, "Build SLP failed: different types ");
314 /* Check the types of the definitions. */
317 case vect_constant_def:
318 case vect_external_def:
321 case vect_internal_def:
322 case vect_reduction_def:
323 if ((i == 0 && !different_types) || (i == 1 && different_types))
324 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
326 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
330 /* FORNOW: Not supported. */
331 if (vect_print_dump_info (REPORT_SLP))
333 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
334 print_generic_expr (vect_dump, def[i], TDF_SLIM);
345 /* Recursively build an SLP tree starting from NODE.
346 Fail (and return FALSE) if def-stmts are not isomorphic, require data
347 permutation or are of unsupported types of operation. Otherwise, return
351 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
352 slp_tree *node, unsigned int group_size,
353 int *inside_cost, int *outside_cost,
354 int ncopies_for_cost, unsigned int *max_nunits,
355 VEC (int, heap) **load_permutation,
356 VEC (slp_tree, heap) **loads,
357 unsigned int vectorization_factor, bool *loads_permuted)
359 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
360 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
362 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
363 gimple stmt = VEC_index (gimple, stmts, 0);
364 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
365 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
366 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
367 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
369 bool stop_recursion = false, need_same_oprnds = false;
370 tree vectype, scalar_type, first_op1 = NULL_TREE;
371 unsigned int ncopies;
374 enum machine_mode optab_op2_mode;
375 enum machine_mode vec_mode;
376 tree first_stmt_const_oprnd = NULL_TREE;
377 struct data_reference *first_dr;
378 bool pattern0 = false, pattern1 = false;
380 bool permutation = false;
381 unsigned int load_place;
382 gimple first_load, prev_first_load = NULL;
384 /* For every stmt in NODE find its def stmt/s. */
385 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
387 if (vect_print_dump_info (REPORT_SLP))
389 fprintf (vect_dump, "Build SLP for ");
390 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
393 /* Fail to vectorize statements marked as unvectorizable. */
394 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
396 if (vect_print_dump_info (REPORT_SLP))
399 "Build SLP failed: unvectorizable statement ");
400 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
406 lhs = gimple_get_lhs (stmt);
407 if (lhs == NULL_TREE)
409 if (vect_print_dump_info (REPORT_SLP))
412 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
413 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
419 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
420 vectype = get_vectype_for_scalar_type (scalar_type);
423 if (vect_print_dump_info (REPORT_SLP))
425 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
426 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
431 /* In case of multiple types we need to detect the smallest type. */
432 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
434 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
436 vectorization_factor = *max_nunits;
439 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
441 if (is_gimple_call (stmt))
442 rhs_code = CALL_EXPR;
444 rhs_code = gimple_assign_rhs_code (stmt);
446 /* Check the operation. */
449 first_stmt_code = rhs_code;
451 /* Shift arguments should be equal in all the packed stmts for a
452 vector shift with scalar shift operand. */
453 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
454 || rhs_code == LROTATE_EXPR
455 || rhs_code == RROTATE_EXPR)
457 vec_mode = TYPE_MODE (vectype);
459 /* First see if we have a vector/vector shift. */
460 optab = optab_for_tree_code (rhs_code, vectype,
464 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
466 /* No vector/vector shift, try for a vector/scalar shift. */
467 optab = optab_for_tree_code (rhs_code, vectype,
472 if (vect_print_dump_info (REPORT_SLP))
473 fprintf (vect_dump, "Build SLP failed: no optab.");
476 icode = (int) optab_handler (optab, vec_mode);
477 if (icode == CODE_FOR_nothing)
479 if (vect_print_dump_info (REPORT_SLP))
480 fprintf (vect_dump, "Build SLP failed: "
481 "op not supported by target.");
484 optab_op2_mode = insn_data[icode].operand[2].mode;
485 if (!VECTOR_MODE_P (optab_op2_mode))
487 need_same_oprnds = true;
488 first_op1 = gimple_assign_rhs2 (stmt);
495 if (first_stmt_code != rhs_code
496 && (first_stmt_code != IMAGPART_EXPR
497 || rhs_code != REALPART_EXPR)
498 && (first_stmt_code != REALPART_EXPR
499 || rhs_code != IMAGPART_EXPR)
500 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
501 && (first_stmt_code == ARRAY_REF
502 || first_stmt_code == INDIRECT_REF
503 || first_stmt_code == COMPONENT_REF
504 || first_stmt_code == MEM_REF)))
506 if (vect_print_dump_info (REPORT_SLP))
509 "Build SLP failed: different operation in stmt ");
510 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
517 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
519 if (vect_print_dump_info (REPORT_SLP))
522 "Build SLP failed: different shift arguments in ");
523 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
530 /* Strided store or load. */
531 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
533 if (REFERENCE_CLASS_P (lhs))
536 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
537 stmt, &def_stmts0, &def_stmts1,
540 &first_stmt_def0_type,
541 &first_stmt_def1_type,
542 &first_stmt_const_oprnd,
544 &pattern0, &pattern1))
550 /* FORNOW: Check that there is no gap between the loads. */
551 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
552 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
553 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
554 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
556 if (vect_print_dump_info (REPORT_SLP))
558 fprintf (vect_dump, "Build SLP failed: strided "
560 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
566 /* Check that the size of interleaved loads group is not
567 greater than the SLP group size. */
569 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
571 if (vect_print_dump_info (REPORT_SLP))
573 fprintf (vect_dump, "Build SLP failed: the number of "
574 "interleaved loads is greater than"
575 " the SLP group size ");
576 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
582 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
585 /* Check that there are no loads from different interleaving
586 chains in the same node. The only exception is complex
588 if (prev_first_load != first_load
589 && rhs_code != REALPART_EXPR
590 && rhs_code != IMAGPART_EXPR)
592 if (vect_print_dump_info (REPORT_SLP))
594 fprintf (vect_dump, "Build SLP failed: different "
595 "interleaving chains in one node ");
596 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
603 prev_first_load = first_load;
605 if (first_load == stmt)
607 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
608 if (vect_supportable_dr_alignment (first_dr, false)
609 == dr_unaligned_unsupported)
611 if (vect_print_dump_info (REPORT_SLP))
613 fprintf (vect_dump, "Build SLP failed: unsupported "
615 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
621 /* Analyze costs (for the first stmt in the group). */
622 vect_model_load_cost (vinfo_for_stmt (stmt),
623 ncopies_for_cost, false, *node);
626 /* Store the place of this load in the interleaving chain. In
627 case that permutation is needed we later decide if a specific
628 permutation is supported. */
629 load_place = vect_get_place_in_interleaving_chain (stmt,
634 VEC_safe_push (int, heap, *load_permutation, load_place);
636 /* We stop the tree when we reach a group of loads. */
637 stop_recursion = true;
640 } /* Strided access. */
643 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
645 /* Not strided load. */
646 if (vect_print_dump_info (REPORT_SLP))
648 fprintf (vect_dump, "Build SLP failed: not strided load ");
649 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
652 /* FORNOW: Not strided loads are not supported. */
656 /* Not memory operation. */
657 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
658 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
660 if (vect_print_dump_info (REPORT_SLP))
662 fprintf (vect_dump, "Build SLP failed: operation");
663 fprintf (vect_dump, " unsupported ");
664 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
670 /* Find the def-stmts. */
671 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
672 &def_stmts0, &def_stmts1,
673 &first_stmt_dt0, &first_stmt_dt1,
674 &first_stmt_def0_type,
675 &first_stmt_def1_type,
676 &first_stmt_const_oprnd,
678 &pattern0, &pattern1))
683 /* Add the costs of the node to the overall instance costs. */
684 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
685 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
687 /* Strided loads were reached - stop the recursion. */
690 VEC_safe_push (slp_tree, heap, *loads, *node);
694 *loads_permuted = true;
696 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
701 /* We don't check here complex numbers chains, so we set
702 LOADS_PERMUTED for further check in
703 vect_supported_load_permutation_p. */
704 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
705 *loads_permuted = true;
711 /* Create SLP_TREE nodes for the definition node/s. */
712 if (first_stmt_dt0 == vect_internal_def)
714 slp_tree left_node = XNEW (struct _slp_tree);
715 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
716 SLP_TREE_VEC_STMTS (left_node) = NULL;
717 SLP_TREE_LEFT (left_node) = NULL;
718 SLP_TREE_RIGHT (left_node) = NULL;
719 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
720 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
721 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
722 inside_cost, outside_cost, ncopies_for_cost,
723 max_nunits, load_permutation, loads,
724 vectorization_factor, loads_permuted))
727 SLP_TREE_LEFT (*node) = left_node;
730 if (first_stmt_dt1 == vect_internal_def)
732 slp_tree right_node = XNEW (struct _slp_tree);
733 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
734 SLP_TREE_VEC_STMTS (right_node) = NULL;
735 SLP_TREE_LEFT (right_node) = NULL;
736 SLP_TREE_RIGHT (right_node) = NULL;
737 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
738 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
739 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
740 inside_cost, outside_cost, ncopies_for_cost,
741 max_nunits, load_permutation, loads,
742 vectorization_factor, loads_permuted))
745 SLP_TREE_RIGHT (*node) = right_node;
753 vect_print_slp_tree (slp_tree node)
761 fprintf (vect_dump, "node ");
762 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
764 fprintf (vect_dump, "\n\tstmt %d ", i);
765 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
767 fprintf (vect_dump, "\n");
769 vect_print_slp_tree (SLP_TREE_LEFT (node));
770 vect_print_slp_tree (SLP_TREE_RIGHT (node));
774 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
775 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
776 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
777 stmts in NODE are to be marked. */
780 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
788 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
790 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
792 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
793 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
797 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
800 vect_mark_slp_stmts_relevant (slp_tree node)
804 stmt_vec_info stmt_info;
809 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
811 stmt_info = vinfo_for_stmt (stmt);
812 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
813 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
814 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
817 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
818 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
822 /* Check if the permutation required by the SLP INSTANCE is supported.
823 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
826 vect_supported_slp_permutation_p (slp_instance instance)
828 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
829 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
830 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
831 VEC (slp_tree, heap) *sorted_loads = NULL;
833 slp_tree *tmp_loads = NULL;
834 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
837 /* FORNOW: The only supported loads permutation is loads from the same
838 location in all the loads in the node, when the data-refs in
839 nodes of LOADS constitute an interleaving chain.
840 Sort the nodes according to the order of accesses in the chain. */
841 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
843 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
844 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
845 i += group_size, j++)
847 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
848 /* Check that the loads are all in the same interleaving chain. */
849 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
851 if (vect_print_dump_info (REPORT_DETAILS))
853 fprintf (vect_dump, "Build SLP failed: unsupported data "
855 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
862 tmp_loads[index] = load;
865 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
866 for (i = 0; i < group_size; i++)
867 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
869 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
870 SLP_INSTANCE_LOADS (instance) = sorted_loads;
873 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
874 SLP_INSTANCE_UNROLLING_FACTOR (instance),
882 /* Rearrange the statements of NODE according to PERMUTATION. */
885 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
886 VEC (int, heap) *permutation)
889 VEC (gimple, heap) *tmp_stmts;
890 unsigned int index, i;
895 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
896 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
898 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
899 tmp_stmts = VEC_alloc (gimple, heap, group_size);
901 for (i = 0; i < group_size; i++)
902 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
904 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
906 index = VEC_index (int, permutation, i);
907 VEC_replace (gimple, tmp_stmts, index, stmt);
910 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
911 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
915 /* Check if the required load permutation is supported.
916 LOAD_PERMUTATION contains a list of indices of the loads.
917 In SLP this permutation is relative to the order of strided stores that are
918 the base of the SLP instance. */
921 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
922 VEC (int, heap) *load_permutation)
924 int i = 0, j, prev = -1, next, k, number_of_groups;
925 bool supported, bad_permutation = false;
927 slp_tree node, other_complex_node;
928 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
929 unsigned complex_numbers = 0;
930 struct data_reference *dr;
931 bb_vec_info bb_vinfo;
933 /* FORNOW: permutations are only supported in SLP. */
937 if (vect_print_dump_info (REPORT_SLP))
939 fprintf (vect_dump, "Load permutation ");
940 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
941 fprintf (vect_dump, "%d ", next);
944 /* In case of reduction every load permutation is allowed, since the order
945 of the reduction statements is not important (as opposed to the case of
946 strided stores). The only condition we need to check is that all the
947 load nodes are of the same size and have the same permutation (and then
948 rearrange all the nodes of the SLP instance according to this
951 /* Check that all the load nodes are of the same size. */
952 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
954 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
955 != (unsigned) group_size)
958 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
959 if (is_gimple_assign (stmt)
960 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
961 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
965 /* Complex operands can be swapped as following:
966 real_c = real_b + real_a;
967 imag_c = imag_a + imag_b;
968 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
969 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
970 chains are mixed, they match the above pattern. */
973 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
975 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
981 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
983 if (complex_numbers != 2)
991 other_complex_node = VEC_index (slp_tree,
992 SLP_INSTANCE_LOADS (slp_instn), k);
993 other_node_first = VEC_index (gimple,
994 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
996 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1005 /* We checked that this case ok, so there is no need to proceed with
1006 permutation tests. */
1007 if (complex_numbers == 2)
1009 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1010 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1014 node = SLP_INSTANCE_TREE (slp_instn);
1015 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1016 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1017 instance, not all the loads belong to the same node or interleaving
1018 group. Hence, we need to divide them into groups according to
1020 number_of_groups = VEC_length (int, load_permutation) / group_size;
1022 /* Reduction (there are no data-refs in the root).
1023 In reduction chain the order of the loads is important. */
1024 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1025 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1027 int first_group_load_index;
1029 /* Compare all the permutation sequences to the first one. */
1030 for (i = 1; i < number_of_groups; i++)
1033 for (j = i * group_size; j < i * group_size + group_size; j++)
1035 next = VEC_index (int, load_permutation, j);
1036 first_group_load_index = VEC_index (int, load_permutation, k);
1038 if (next != first_group_load_index)
1040 bad_permutation = true;
1047 if (bad_permutation)
1051 if (!bad_permutation)
1053 /* Check that the loads in the first sequence are different and there
1054 are no gaps between them. */
1055 load_index = sbitmap_alloc (group_size);
1056 sbitmap_zero (load_index);
1057 for (k = 0; k < group_size; k++)
1059 first_group_load_index = VEC_index (int, load_permutation, k);
1060 if (TEST_BIT (load_index, first_group_load_index))
1062 bad_permutation = true;
1066 SET_BIT (load_index, first_group_load_index);
1069 if (!bad_permutation)
1070 for (k = 0; k < group_size; k++)
1071 if (!TEST_BIT (load_index, k))
1073 bad_permutation = true;
1077 sbitmap_free (load_index);
1080 if (!bad_permutation)
1082 /* This permutation is valid for reduction. Since the order of the
1083 statements in the nodes is not important unless they are memory
1084 accesses, we can rearrange the statements in all the nodes
1085 according to the order of the loads. */
1086 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1088 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1093 /* In basic block vectorization we allow any subchain of an interleaving
1095 FORNOW: not supported in loop SLP because of realignment compications. */
1096 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1097 bad_permutation = false;
1098 /* Check that for every node in the instance teh loads form a subchain. */
1101 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1105 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1108 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1110 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1112 bad_permutation = true;
1116 if (j != 0 && next_load != load)
1118 bad_permutation = true;
1122 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1125 if (bad_permutation)
1129 /* Check that the alignment of the first load in every subchain, i.e.,
1130 the first statement in every load node, is supported. */
1131 if (!bad_permutation)
1133 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1135 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1137 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1139 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1140 if (vect_supportable_dr_alignment (dr, false)
1141 == dr_unaligned_unsupported)
1143 if (vect_print_dump_info (REPORT_SLP))
1145 fprintf (vect_dump, "unsupported unaligned load ");
1146 print_gimple_stmt (vect_dump, first_load, 0,
1149 bad_permutation = true;
1155 if (!bad_permutation)
1157 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1163 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1164 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1165 well (unless it's reduction). */
1166 if (VEC_length (int, load_permutation)
1167 != (unsigned int) (group_size * group_size))
1171 load_index = sbitmap_alloc (group_size);
1172 sbitmap_zero (load_index);
1173 for (j = 0; j < group_size; j++)
1175 for (i = j * group_size, k = 0;
1176 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1179 if (i != j * group_size && next != prev)
1188 if (TEST_BIT (load_index, prev))
1194 SET_BIT (load_index, prev);
1197 for (j = 0; j < group_size; j++)
1198 if (!TEST_BIT (load_index, j))
1201 sbitmap_free (load_index);
1203 if (supported && i == group_size * group_size
1204 && vect_supported_slp_permutation_p (slp_instn))
1211 /* Find the first load in the loop that belongs to INSTANCE.
1212 When loads are in several SLP nodes, there can be a case in which the first
1213 load does not appear in the first SLP node to be transformed, causing
1214 incorrect order of statements. Since we generate all the loads together,
1215 they must be inserted before the first load of the SLP instance and not
1216 before the first load of the first node of the instance. */
1219 vect_find_first_load_in_slp_instance (slp_instance instance)
1223 gimple first_load = NULL, load;
1225 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1226 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
1227 first_load = get_earlier_stmt (load, first_load);
1233 /* Find the last store in SLP INSTANCE. */
1236 vect_find_last_store_in_slp_instance (slp_instance instance)
1240 gimple last_store = NULL, store;
1242 node = SLP_INSTANCE_TREE (instance);
1244 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1246 last_store = get_later_stmt (store, last_store);
1252 /* Analyze an SLP instance starting from a group of strided stores. Call
1253 vect_build_slp_tree to build a tree of packed stmts if possible.
1254 Return FALSE if it's impossible to SLP any stmt in the loop. */
1257 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1260 slp_instance new_instance;
1261 slp_tree node = XNEW (struct _slp_tree);
1262 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1263 unsigned int unrolling_factor = 1, nunits;
1264 tree vectype, scalar_type = NULL_TREE;
1266 unsigned int vectorization_factor = 0;
1267 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
1268 unsigned int max_nunits = 0;
1269 VEC (int, heap) *load_permutation;
1270 VEC (slp_tree, heap) *loads;
1271 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1272 bool loads_permuted = false;
1274 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1278 scalar_type = TREE_TYPE (DR_REF (dr));
1279 vectype = get_vectype_for_scalar_type (scalar_type);
1283 gcc_assert (loop_vinfo);
1284 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1287 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1291 gcc_assert (loop_vinfo);
1292 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1293 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1298 if (vect_print_dump_info (REPORT_SLP))
1300 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1301 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1307 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1309 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1311 vectorization_factor = nunits;
1313 /* Calculate the unrolling factor. */
1314 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1315 if (unrolling_factor != 1 && !loop_vinfo)
1317 if (vect_print_dump_info (REPORT_SLP))
1318 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1324 /* Create a node (a root of the SLP tree) for the packed strided stores. */
1325 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1327 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1329 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1332 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1333 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1338 /* Collect reduction statements. */
1339 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1342 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1345 SLP_TREE_VEC_STMTS (node) = NULL;
1346 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1347 SLP_TREE_LEFT (node) = NULL;
1348 SLP_TREE_RIGHT (node) = NULL;
1349 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1350 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1352 /* Calculate the number of vector stmts to create based on the unrolling
1353 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1354 GROUP_SIZE / NUNITS otherwise. */
1355 ncopies_for_cost = unrolling_factor * group_size / nunits;
1357 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1358 loads = VEC_alloc (slp_tree, heap, group_size);
1360 /* Build the tree for the SLP instance. */
1361 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1362 &inside_cost, &outside_cost, ncopies_for_cost,
1363 &max_nunits, &load_permutation, &loads,
1364 vectorization_factor, &loads_permuted))
1366 /* Calculate the unrolling factor based on the smallest type. */
1367 if (max_nunits > nunits)
1368 unrolling_factor = least_common_multiple (max_nunits, group_size)
1371 if (unrolling_factor != 1 && !loop_vinfo)
1373 if (vect_print_dump_info (REPORT_SLP))
1374 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1379 /* Create a new SLP instance. */
1380 new_instance = XNEW (struct _slp_instance);
1381 SLP_INSTANCE_TREE (new_instance) = node;
1382 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1383 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1384 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1385 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1386 SLP_INSTANCE_LOADS (new_instance) = loads;
1387 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1388 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1392 if (!vect_supported_load_permutation_p (new_instance, group_size,
1395 if (vect_print_dump_info (REPORT_SLP))
1397 fprintf (vect_dump, "Build SLP failed: unsupported load "
1399 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1402 vect_free_slp_instance (new_instance);
1406 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1407 = vect_find_first_load_in_slp_instance (new_instance);
1410 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1413 VEC_safe_push (slp_instance, heap,
1414 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1417 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1420 if (vect_print_dump_info (REPORT_SLP))
1421 vect_print_slp_tree (node);
1426 /* Failed to SLP. */
1427 /* Free the allocated memory. */
1428 vect_free_slp_tree (node);
1429 VEC_free (int, heap, load_permutation);
1430 VEC_free (slp_tree, heap, loads);
1436 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1437 trees of packed scalar stmts if SLP is possible. */
1440 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1443 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1444 gimple first_element;
1447 if (vect_print_dump_info (REPORT_SLP))
1448 fprintf (vect_dump, "=== vect_analyze_slp ===");
1452 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1453 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1454 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1457 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1459 /* Find SLP sequences starting from groups of strided stores. */
1460 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1461 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1464 if (bb_vinfo && !ok)
1466 if (vect_print_dump_info (REPORT_SLP))
1467 fprintf (vect_dump, "Failed to SLP the basic block.");
1473 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1475 /* Find SLP sequences starting from reduction chains. */
1476 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1477 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1482 /* Don't try to vectorize SLP reductions if reduction chain was
1487 /* Find SLP sequences starting from groups of reductions. */
1488 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
1489 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1490 VEC_index (gimple, reductions, 0)))
1497 /* For each possible SLP instance decide whether to SLP it and calculate overall
1498 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1499 least one instance. */
1502 vect_make_slp_decision (loop_vec_info loop_vinfo)
1504 unsigned int i, unrolling_factor = 1;
1505 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1506 slp_instance instance;
1507 int decided_to_slp = 0;
1509 if (vect_print_dump_info (REPORT_SLP))
1510 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1512 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1514 /* FORNOW: SLP if you can. */
1515 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1516 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1518 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1519 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1520 loop-based vectorization. Such stmts will be marked as HYBRID. */
1521 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1525 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1527 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1528 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1529 decided_to_slp, unrolling_factor);
1531 return (decided_to_slp > 0);
1535 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1536 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1539 vect_detect_hybrid_slp_stmts (slp_tree node)
1543 imm_use_iterator imm_iter;
1545 stmt_vec_info stmt_vinfo;
1550 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1551 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1552 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1553 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1554 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1555 && !STMT_SLP_TYPE (stmt_vinfo)
1556 && (STMT_VINFO_RELEVANT (stmt_vinfo)
1557 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1558 && !(gimple_code (use_stmt) == GIMPLE_PHI
1559 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1560 == vect_reduction_def))
1561 vect_mark_slp_stmts (node, hybrid, i);
1563 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1564 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1568 /* Find stmts that must be both vectorized and SLPed. */
1571 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1574 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1575 slp_instance instance;
1577 if (vect_print_dump_info (REPORT_SLP))
1578 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1580 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1581 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1585 /* Create and initialize a new bb_vec_info struct for BB, as well as
1586 stmt_vec_info structs for all the stmts in it. */
1589 new_bb_vec_info (basic_block bb)
1591 bb_vec_info res = NULL;
1592 gimple_stmt_iterator gsi;
1594 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1595 BB_VINFO_BB (res) = bb;
1597 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1599 gimple stmt = gsi_stmt (gsi);
1600 gimple_set_uid (stmt, 0);
1601 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1604 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1605 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1612 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1613 stmts in the basic block. */
1616 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1619 gimple_stmt_iterator si;
1624 bb = BB_VINFO_BB (bb_vinfo);
1626 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1628 gimple stmt = gsi_stmt (si);
1629 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1632 /* Free stmt_vec_info. */
1633 free_stmt_vec_info (stmt);
1636 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1637 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1638 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1639 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1645 /* Analyze statements contained in SLP tree node after recursively analyzing
1646 the subtree. Return TRUE if the operations are supported. */
1649 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1658 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1659 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1662 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
1664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1665 gcc_assert (stmt_info);
1666 gcc_assert (PURE_SLP_STMT (stmt_info));
1668 if (!vect_analyze_stmt (stmt, &dummy, node))
1676 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
1677 operations are supported. */
1680 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1682 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1683 slp_instance instance;
1686 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1688 if (!vect_slp_analyze_node_operations (bb_vinfo,
1689 SLP_INSTANCE_TREE (instance)))
1691 vect_free_slp_instance (instance);
1692 VEC_ordered_remove (slp_instance, slp_instances, i);
1698 if (!VEC_length (slp_instance, slp_instances))
1704 /* Check if loads and stores are mixed in the basic block (in that
1705 case if we are not sure that the accesses differ, we can't vectorize the
1706 basic block). Also return FALSE in case that there is statement marked as
1707 not vectorizable. */
1710 vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1712 basic_block bb = BB_VINFO_BB (bb_vinfo);
1713 gimple_stmt_iterator si;
1714 bool detected_store = false;
1716 struct data_reference *dr;
1718 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1720 stmt = gsi_stmt (si);
1722 /* We can't allow not analyzed statements, since they may contain data
1724 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1727 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1730 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1731 if (DR_IS_READ (dr) && detected_store)
1734 if (!DR_IS_READ (dr))
1735 detected_store = true;
1741 /* Check if vectorization of the basic block is profitable. */
1744 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1746 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1747 slp_instance instance;
1749 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1750 unsigned int stmt_cost;
1752 gimple_stmt_iterator si;
1753 basic_block bb = BB_VINFO_BB (bb_vinfo);
1754 stmt_vec_info stmt_info = NULL;
1755 tree dummy_type = NULL;
1758 /* Calculate vector costs. */
1759 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1761 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1762 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1765 /* Calculate scalar cost. */
1766 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1768 stmt = gsi_stmt (si);
1769 stmt_info = vinfo_for_stmt (stmt);
1771 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1772 || !PURE_SLP_STMT (stmt_info))
1775 if (STMT_VINFO_DATA_REF (stmt_info))
1777 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1778 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1779 (scalar_load, dummy_type, dummy);
1781 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1782 (scalar_store, dummy_type, dummy);
1785 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1786 (scalar_stmt, dummy_type, dummy);
1788 scalar_cost += stmt_cost;
1791 if (vect_print_dump_info (REPORT_COST))
1793 fprintf (vect_dump, "Cost model analysis: \n");
1794 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1796 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1798 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1801 /* Vectorization is profitable if its cost is less than the cost of scalar
1803 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1809 /* Check if the basic block can be vectorized. */
1812 vect_slp_analyze_bb_1 (basic_block bb)
1814 bb_vec_info bb_vinfo;
1815 VEC (ddr_p, heap) *ddrs;
1816 VEC (slp_instance, heap) *slp_instances;
1817 slp_instance instance;
1820 int max_vf = MAX_VECTORIZATION_FACTOR;
1821 bool data_dependence_in_bb = false;
1823 bb_vinfo = new_bb_vec_info (bb);
1827 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
1829 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1830 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1833 destroy_bb_vec_info (bb_vinfo);
1837 ddrs = BB_VINFO_DDRS (bb_vinfo);
1838 if (!VEC_length (ddr_p, ddrs))
1840 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1841 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1844 destroy_bb_vec_info (bb_vinfo);
1848 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1849 &data_dependence_in_bb)
1851 || (data_dependence_in_bb
1852 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
1854 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1855 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1856 "in basic block.\n");
1858 destroy_bb_vec_info (bb_vinfo);
1862 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1864 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1865 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1868 destroy_bb_vec_info (bb_vinfo);
1872 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1874 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1875 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1878 destroy_bb_vec_info (bb_vinfo);
1882 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1884 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1885 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1888 destroy_bb_vec_info (bb_vinfo);
1892 /* Check the SLP opportunities in the basic block, analyze and build SLP
1894 if (!vect_analyze_slp (NULL, bb_vinfo))
1896 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1897 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1898 "in basic block.\n");
1900 destroy_bb_vec_info (bb_vinfo);
1904 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1906 /* Mark all the statements that we want to vectorize as pure SLP and
1908 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
1910 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1911 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1914 if (!vect_slp_analyze_operations (bb_vinfo))
1916 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1917 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1919 destroy_bb_vec_info (bb_vinfo);
1923 /* Cost model: check if the vectorization is worthwhile. */
1924 if (flag_vect_cost_model
1925 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1927 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1928 fprintf (vect_dump, "not vectorized: vectorization is not "
1931 destroy_bb_vec_info (bb_vinfo);
1935 if (vect_print_dump_info (REPORT_DETAILS))
1936 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1943 vect_slp_analyze_bb (basic_block bb)
1945 bb_vec_info bb_vinfo;
1947 gimple_stmt_iterator gsi;
1948 unsigned int vector_sizes;
1950 if (vect_print_dump_info (REPORT_DETAILS))
1951 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1953 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1955 gimple stmt = gsi_stmt (gsi);
1956 if (!is_gimple_debug (stmt)
1957 && !gimple_nop_p (stmt)
1958 && gimple_code (stmt) != GIMPLE_LABEL)
1962 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1964 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1965 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1971 /* Autodetect first vector size we try. */
1972 current_vector_size = 0;
1973 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1977 bb_vinfo = vect_slp_analyze_bb_1 (bb);
1981 destroy_bb_vec_info (bb_vinfo);
1983 vector_sizes &= ~current_vector_size;
1984 if (vector_sizes == 0
1985 || current_vector_size == 0)
1988 /* Try the next biggest vector size. */
1989 current_vector_size = 1 << floor_log2 (vector_sizes);
1990 if (vect_print_dump_info (REPORT_DETAILS))
1991 fprintf (vect_dump, "***** Re-trying analysis with "
1992 "vector size %d\n", current_vector_size);
1997 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1998 the number of created vector stmts depends on the unrolling factor).
1999 However, the actual number of vector stmts for every SLP node depends on
2000 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2001 should be updated. In this function we assume that the inside costs
2002 calculated in vect_model_xxx_cost are linear in ncopies. */
2005 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2007 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2008 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2009 slp_instance instance;
2011 if (vect_print_dump_info (REPORT_SLP))
2012 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2014 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2015 /* We assume that costs are linear in ncopies. */
2016 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2017 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2021 /* For constant and loop invariant defs of SLP_NODE this function returns
2022 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2023 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2024 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2025 REDUC_INDEX is the index of the reduction operand in the statements, unless
2029 vect_get_constant_vectors (tree op, slp_tree slp_node,
2030 VEC (tree, heap) **vec_oprnds,
2031 unsigned int op_num, unsigned int number_of_vectors,
2034 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2035 gimple stmt = VEC_index (gimple, stmts, 0);
2036 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2040 int j, number_of_places_left_in_vector;
2043 int group_size = VEC_length (gimple, stmts);
2044 unsigned int vec_num, i;
2045 int number_of_copies = 1;
2046 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2047 bool constant_p, is_store;
2048 tree neutral_op = NULL;
2049 enum tree_code code = gimple_assign_rhs_code (stmt);
2053 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2054 && reduc_index != -1)
2056 op_num = reduc_index - 1;
2057 op = gimple_op (stmt, reduc_index);
2058 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2059 we need either neutral operands or the original operands. See
2060 get_initial_def_for_reduction() for details. */
2063 case WIDEN_SUM_EXPR:
2069 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2070 neutral_op = build_real (TREE_TYPE (op), dconst0);
2072 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2077 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2078 neutral_op = build_real (TREE_TYPE (op), dconst1);
2080 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2085 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2090 def_stmt = SSA_NAME_DEF_STMT (op);
2091 loop = (gimple_bb (stmt))->loop_father;
2092 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2093 loop_preheader_edge (loop));
2101 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2104 op = gimple_assign_rhs1 (stmt);
2111 if (CONSTANT_CLASS_P (op))
2116 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2117 gcc_assert (vector_type);
2118 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2120 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2121 created vectors. It is greater than 1 if unrolling is performed.
2123 For example, we have two scalar operands, s1 and s2 (e.g., group of
2124 strided accesses of size two), while NUNITS is four (i.e., four scalars
2125 of this type can be packed in a vector). The output vector will contain
2126 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2129 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2130 containing the operands.
2132 For example, NUNITS is four as before, and the group size is 8
2133 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2134 {s5, s6, s7, s8}. */
2136 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2138 number_of_places_left_in_vector = nunits;
2139 for (j = 0; j < number_of_copies; j++)
2141 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2144 op = gimple_assign_rhs1 (stmt);
2146 op = gimple_op (stmt, op_num + 1);
2148 if (reduc_index != -1)
2150 loop = (gimple_bb (stmt))->loop_father;
2151 def_stmt = SSA_NAME_DEF_STMT (op);
2155 /* Get the def before the loop. In reduction chain we have only
2156 one initial value. */
2157 if ((j != (number_of_copies - 1)
2158 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2163 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2164 loop_preheader_edge (loop));
2167 /* Create 'vect_ = {op0,op1,...,opn}'. */
2168 t = tree_cons (NULL_TREE, op, t);
2170 number_of_places_left_in_vector--;
2172 if (number_of_places_left_in_vector == 0)
2174 number_of_places_left_in_vector = nunits;
2177 vec_cst = build_vector (vector_type, t);
2179 vec_cst = build_constructor_from_list (vector_type, t);
2180 VEC_quick_push (tree, voprnds,
2181 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2187 /* Since the vectors are created in the reverse order, we should invert
2189 vec_num = VEC_length (tree, voprnds);
2190 for (j = vec_num - 1; j >= 0; j--)
2192 vop = VEC_index (tree, voprnds, j);
2193 VEC_quick_push (tree, *vec_oprnds, vop);
2196 VEC_free (tree, heap, voprnds);
2198 /* In case that VF is greater than the unrolling factor needed for the SLP
2199 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2200 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2201 to replicate the vectors. */
2202 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2204 tree neutral_vec = NULL;
2209 neutral_vec = build_vector_from_val (vector_type, neutral_op);
2211 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2215 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2216 VEC_quick_push (tree, *vec_oprnds, vop);
2222 /* Get vectorized definitions from SLP_NODE that contains corresponding
2223 vectorized def-stmts. */
2226 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2229 gimple vec_def_stmt;
2232 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2234 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2236 gcc_assert (vec_def_stmt);
2237 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2238 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2243 /* Get vectorized definitions for SLP_NODE.
2244 If the scalar definitions are loop invariants or constants, collect them and
2245 call vect_get_constant_vectors() to create vector stmts.
2246 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2247 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
2248 vect_get_slp_vect_defs() to retrieve them.
2249 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
2250 the right node. This is used when the second operand must remain scalar. */
2253 vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
2254 VEC (tree,heap) **vec_oprnds0,
2255 VEC (tree,heap) **vec_oprnds1, int reduc_index)
2258 enum tree_code code;
2259 int number_of_vects;
2260 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2262 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2263 /* The number of vector defs is determined by the number of vector statements
2264 in the node from which we get those statements. */
2265 if (SLP_TREE_LEFT (slp_node))
2266 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2269 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2270 /* Number of vector stmts was calculated according to LHS in
2271 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
2272 necessary. See vect_get_smallest_scalar_type () for details. */
2273 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2275 if (rhs_size_unit != lhs_size_unit)
2277 number_of_vects *= rhs_size_unit;
2278 number_of_vects /= lhs_size_unit;
2282 /* Allocate memory for vectorized defs. */
2283 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2285 /* SLP_NODE corresponds either to a group of stores or to a group of
2286 unary/binary operations. We don't call this function for loads.
2287 For reduction defs we call vect_get_constant_vectors(), since we are
2288 looking for initial loop invariant values. */
2289 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
2290 /* The defs are already vectorized. */
2291 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2293 /* Build vectors from scalar defs. */
2294 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
2297 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2298 /* Since we don't call this function with loads, this is a group of
2302 /* For reductions, we only need initial values. */
2303 if (reduc_index != -1)
2306 code = gimple_assign_rhs_code (first_stmt);
2307 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || !op1)
2310 /* The number of vector defs is determined by the number of vector statements
2311 in the node from which we get those statements. */
2312 if (SLP_TREE_RIGHT (slp_node))
2313 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2315 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2317 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2319 if (SLP_TREE_RIGHT (slp_node))
2320 /* The defs are already vectorized. */
2321 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2323 /* Build vectors from scalar defs. */
2324 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
2329 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2330 building a vector of type MASK_TYPE from it) and two input vectors placed in
2331 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2332 shifting by STRIDE elements of DR_CHAIN for every copy.
2333 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2335 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2336 the created stmts must be inserted. */
2339 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2340 tree mask, int first_vec_indx, int second_vec_indx,
2341 gimple_stmt_iterator *gsi, slp_tree node,
2342 tree vectype, VEC(tree,heap) *dr_chain,
2343 int ncopies, int vect_stmts_counter)
2346 gimple perm_stmt = NULL;
2347 stmt_vec_info next_stmt_info;
2349 tree first_vec, second_vec, data_ref;
2351 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2353 /* Initialize the vect stmts of NODE to properly insert the generated
2355 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
2356 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2357 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2359 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2360 for (i = 0; i < ncopies; i++)
2362 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2363 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2365 /* Generate the permute statement. */
2366 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2367 first_vec, second_vec, mask);
2368 data_ref = make_ssa_name (perm_dest, perm_stmt);
2369 gimple_set_lhs (perm_stmt, data_ref);
2370 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2372 /* Store the vector statement in NODE. */
2373 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
2374 stride * i + vect_stmts_counter, perm_stmt);
2376 first_vec_indx += stride;
2377 second_vec_indx += stride;
2380 /* Mark the scalar stmt as vectorized. */
2381 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2382 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2386 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2387 return in CURRENT_MASK_ELEMENT its equivalent in target specific
2388 representation. Check that the mask is valid and return FALSE if not.
2389 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2390 the next vector, i.e., the current first vector is not needed. */
2393 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2394 int mask_nunits, bool only_one_vec, int index,
2395 int *mask, int *current_mask_element,
2396 bool *need_next_vector, int *number_of_mask_fixes,
2397 bool *mask_fixed, bool *needs_first_vector)
2401 /* Convert to target specific representation. */
2402 *current_mask_element = first_mask_element + m;
2403 /* Adjust the value in case it's a mask for second and third vectors. */
2404 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2406 if (*current_mask_element < mask_nunits)
2407 *needs_first_vector = true;
2409 /* We have only one input vector to permute but the mask accesses values in
2410 the next vector as well. */
2411 if (only_one_vec && *current_mask_element >= mask_nunits)
2413 if (vect_print_dump_info (REPORT_DETAILS))
2415 fprintf (vect_dump, "permutation requires at least two vectors ");
2416 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2422 /* The mask requires the next vector. */
2423 if (*current_mask_element >= mask_nunits * 2)
2425 if (*needs_first_vector || *mask_fixed)
2427 /* We either need the first vector too or have already moved to the
2428 next vector. In both cases, this permutation needs three
2430 if (vect_print_dump_info (REPORT_DETAILS))
2432 fprintf (vect_dump, "permutation requires at "
2433 "least three vectors ");
2434 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2440 /* We move to the next vector, dropping the first one and working with
2441 the second and the third - we need to adjust the values of the mask
2443 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2445 for (i = 0; i < index; i++)
2446 mask[i] -= mask_nunits * *number_of_mask_fixes;
2448 (*number_of_mask_fixes)++;
2452 *need_next_vector = *mask_fixed;
2454 /* This was the last element of this mask. Start a new one. */
2455 if (index == mask_nunits - 1)
2457 *number_of_mask_fixes = 1;
2458 *mask_fixed = false;
2459 *needs_first_vector = false;
2466 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2467 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2468 permute statements for SLP_NODE_INSTANCE. */
2470 vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2471 gimple_stmt_iterator *gsi, int vf,
2472 slp_instance slp_node_instance, bool analyze_only)
2474 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2475 tree mask_element_type = NULL_TREE, mask_type;
2476 int i, j, k, nunits, vec_index = 0, scalar_index;
2478 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2479 gimple next_scalar_stmt;
2480 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2481 int first_mask_element;
2482 int index, unroll_factor, *mask, current_mask_element, ncopies;
2483 bool only_one_vec = false, need_next_vector = false;
2484 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2485 int number_of_mask_fixes = 1;
2486 bool mask_fixed = false;
2487 bool needs_first_vector = false;
2489 if (!can_vec_perm_expr_p (vectype, NULL_TREE))
2491 if (vect_print_dump_info (REPORT_DETAILS))
2493 fprintf (vect_dump, "no vect permute for ");
2494 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2499 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2500 same size as the vector element being permuted. */
2502 = lang_hooks.types.type_for_size
2503 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
2504 mask_type = get_vectype_for_scalar_type (mask_element_type);
2505 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2506 mask = (int *) xmalloc (sizeof (int) * nunits);
2507 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2509 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2510 unrolling factor. */
2511 orig_vec_stmts_num = group_size *
2512 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2513 if (orig_vec_stmts_num == 1)
2514 only_one_vec = true;
2516 /* Number of copies is determined by the final vectorization factor
2517 relatively to SLP_NODE_INSTANCE unrolling factor. */
2518 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2520 /* Generate permutation masks for every NODE. Number of masks for each NODE
2521 is equal to GROUP_SIZE.
2522 E.g., we have a group of three nodes with three loads from the same
2523 location in each node, and the vector size is 4. I.e., we have a
2524 a0b0c0a1b1c1... sequence and we need to create the following vectors:
2525 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2526 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2529 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2530 The last mask is illegal since we assume two operands for permute
2531 operation, and the mask element values can't be outside that range.
2532 Hence, the last mask must be converted into {2,5,5,5}.
2533 For the first two permutations we need the first and the second input
2534 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2535 we need the second and the third vectors: {b1,c1,a2,b2} and
2538 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2542 vect_stmts_counter = 0;
2544 first_vec_index = vec_index++;
2546 second_vec_index = first_vec_index;
2548 second_vec_index = vec_index++;
2550 for (j = 0; j < unroll_factor; j++)
2552 for (k = 0; k < group_size; k++)
2554 first_mask_element = i + j * group_size;
2555 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2556 nunits, only_one_vec, index,
2557 mask, ¤t_mask_element,
2559 &number_of_mask_fixes, &mask_fixed,
2560 &needs_first_vector))
2562 mask[index++] = current_mask_element;
2564 if (index == nunits)
2566 tree mask_vec = NULL;
2568 while (--index >= 0)
2570 tree t = build_int_cst (mask_element_type, mask[index]);
2571 mask_vec = tree_cons (NULL, t, mask_vec);
2573 mask_vec = build_vector (mask_type, mask_vec);
2576 if (!can_vec_perm_expr_p (vectype, mask_vec))
2578 if (vect_print_dump_info (REPORT_DETAILS))
2580 fprintf (vect_dump, "unsupported vect permute ");
2581 print_generic_expr (vect_dump, mask_vec, 0);
2589 if (need_next_vector)
2591 first_vec_index = second_vec_index;
2592 second_vec_index = vec_index;
2595 next_scalar_stmt = VEC_index (gimple,
2596 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2598 vect_create_mask_and_perm (stmt, next_scalar_stmt,
2599 mask_vec, first_vec_index, second_vec_index,
2600 gsi, node, vectype, dr_chain,
2601 ncopies, vect_stmts_counter++);
2614 /* Vectorize SLP instance tree in postorder. */
2617 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2618 unsigned int vectorization_factor)
2621 bool strided_store, is_store;
2622 gimple_stmt_iterator si;
2623 stmt_vec_info stmt_info;
2624 unsigned int vec_stmts_size, nunits, group_size;
2627 slp_tree loads_node;
2632 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2633 vectorization_factor);
2634 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2635 vectorization_factor);
2637 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2638 stmt_info = vinfo_for_stmt (stmt);
2640 /* VECTYPE is the type of the destination. */
2641 vectype = STMT_VINFO_VECTYPE (stmt_info);
2642 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2643 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2645 /* For each SLP instance calculate number of vector stmts to be created
2646 for the scalar stmts in each node of the SLP tree. Number of vector
2647 elements in one vector iteration is the number of scalar elements in
2648 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2650 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2652 /* In case of load permutation we have to allocate vectorized statements for
2653 all the nodes that participate in that permutation. */
2654 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2656 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
2658 if (!SLP_TREE_VEC_STMTS (loads_node))
2660 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2662 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2667 if (!SLP_TREE_VEC_STMTS (node))
2669 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2670 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2673 if (vect_print_dump_info (REPORT_DETAILS))
2675 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2676 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2679 /* Loads should be inserted before the first load. */
2680 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2681 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2682 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2683 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
2684 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2685 else if (is_pattern_stmt_p (stmt_info))
2686 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2688 si = gsi_for_stmt (stmt);
2690 /* Stores should be inserted just before the last store. */
2691 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2692 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2694 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2695 si = gsi_for_stmt (last_store);
2698 /* Mark the first element of the reduction chain as reduction to properly
2699 transform the node. In the analysis phase only the last element of the
2700 chain is marked as reduction. */
2701 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2702 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2704 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2705 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2708 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2713 /* Generate vector code for all SLP instances in the loop/basic block. */
2716 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2718 VEC (slp_instance, heap) *slp_instances;
2719 slp_instance instance;
2721 bool is_store = false;
2725 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2726 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2730 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2734 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2736 /* Schedule the tree of INSTANCE. */
2737 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2739 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2740 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2741 fprintf (vect_dump, "vectorizing stmts using SLP.");
2744 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2746 slp_tree root = SLP_INSTANCE_TREE (instance);
2749 gimple_stmt_iterator gsi;
2751 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2752 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2754 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2757 /* Free the attached stmt_vec_info and remove the stmt. */
2758 gsi = gsi_for_stmt (store);
2759 gsi_remove (&gsi, true);
2760 free_stmt_vec_info (store);
2768 /* Vectorize the basic block. */
2771 vect_slp_transform_bb (basic_block bb)
2773 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2774 gimple_stmt_iterator si;
2776 gcc_assert (bb_vinfo);
2778 if (vect_print_dump_info (REPORT_DETAILS))
2779 fprintf (vect_dump, "SLPing BB\n");
2781 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2783 gimple stmt = gsi_stmt (si);
2784 stmt_vec_info stmt_info;
2786 if (vect_print_dump_info (REPORT_DETAILS))
2788 fprintf (vect_dump, "------>SLPing statement: ");
2789 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2792 stmt_info = vinfo_for_stmt (stmt);
2793 gcc_assert (stmt_info);
2795 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2796 if (STMT_SLP_TYPE (stmt_info))
2798 vect_schedule_slp (NULL, bb_vinfo);
2803 mark_sym_for_renaming (gimple_vop (cfun));
2804 /* The memory tags and pointers in vectorized statements need to
2805 have their SSA forms updated. FIXME, why can't this be delayed
2806 until all the loops have been transformed? */
2807 update_ssa (TODO_update_ssa);
2809 if (vect_print_dump_info (REPORT_DETAILS))
2810 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2812 destroy_bb_vec_info (bb_vinfo);