Backport from GCC mainline.
[platform/upstream/linaro-gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2016 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h"              /* FIXME: for insn_data */
35 #include "params.h"
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
39 #include "cfgloop.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
43 #include "dbgcnt.h"
44
45
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
47
48 static void
49 vect_free_slp_tree (slp_tree node)
50 {
51   int i;
52   slp_tree child;
53
54   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55     vect_free_slp_tree (child);
56
57   gimple *stmt;
58   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59     /* After transform some stmts are removed and thus their vinfo is gone.  */
60     if (vinfo_for_stmt (stmt))
61       {
62         gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63         STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
64       }
65
66   SLP_TREE_CHILDREN (node).release ();
67   SLP_TREE_SCALAR_STMTS (node).release ();
68   SLP_TREE_VEC_STMTS (node).release ();
69   SLP_TREE_LOAD_PERMUTATION (node).release ();
70
71   free (node);
72 }
73
74
75 /* Free the memory allocated for the SLP instance.  */
76
77 void
78 vect_free_slp_instance (slp_instance instance)
79 {
80   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
81   SLP_INSTANCE_LOADS (instance).release ();
82   free (instance);
83 }
84
85
86 /* Create an SLP node for SCALAR_STMTS.  */
87
88 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
90 {
91   slp_tree node;
92   gimple *stmt = scalar_stmts[0];
93   unsigned int nops;
94
95   if (is_gimple_call (stmt))
96     nops = gimple_call_num_args (stmt);
97   else if (is_gimple_assign (stmt))
98     {
99       nops = gimple_num_ops (stmt) - 1;
100       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101         nops++;
102     }
103   else
104     return NULL;
105
106   node = XNEW (struct _slp_tree);
107   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
108   SLP_TREE_VEC_STMTS (node).create (0);
109   SLP_TREE_CHILDREN (node).create (nops);
110   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
111   SLP_TREE_TWO_OPERATORS (node) = false;
112   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
113
114   unsigned i;
115   FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
116     STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
117
118   return node;
119 }
120
121
122 /* This structure is used in creation of an SLP tree.  Each instance
123    corresponds to the same operand in a group of scalar stmts in an SLP
124    node.  */
125 typedef struct _slp_oprnd_info
126 {
127   /* Def-stmts for the operands.  */
128   vec<gimple *> def_stmts;
129   /* Information about the first statement, its vector def-type, type, the
130      operand itself in case it's constant, and an indication if it's a pattern
131      stmt.  */
132   enum vect_def_type first_dt;
133   tree first_op_type;
134   bool first_pattern;
135   bool second_pattern;
136 } *slp_oprnd_info;
137
138
139 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
140    operand.  */
141 static vec<slp_oprnd_info> 
142 vect_create_oprnd_info (int nops, int group_size)
143 {
144   int i;
145   slp_oprnd_info oprnd_info;
146   vec<slp_oprnd_info> oprnds_info;
147
148   oprnds_info.create (nops);
149   for (i = 0; i < nops; i++)
150     {
151       oprnd_info = XNEW (struct _slp_oprnd_info);
152       oprnd_info->def_stmts.create (group_size);
153       oprnd_info->first_dt = vect_uninitialized_def;
154       oprnd_info->first_op_type = NULL_TREE;
155       oprnd_info->first_pattern = false;
156       oprnd_info->second_pattern = false;
157       oprnds_info.quick_push (oprnd_info);
158     }
159
160   return oprnds_info;
161 }
162
163
164 /* Free operands info.  */
165
166 static void
167 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
168 {
169   int i;
170   slp_oprnd_info oprnd_info;
171
172   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
173     {
174       oprnd_info->def_stmts.release ();
175       XDELETE (oprnd_info);
176     }
177
178   oprnds_info.release ();
179 }
180
181
182 /* Find the place of the data-ref in STMT in the interleaving chain that starts
183    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
184
185 static int
186 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
187 {
188   gimple *next_stmt = first_stmt;
189   int result = 0;
190
191   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
192     return -1;
193
194   do
195     {
196       if (next_stmt == stmt)
197         return result;
198       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
199       if (next_stmt)
200         result += GROUP_GAP (vinfo_for_stmt (next_stmt));
201     }
202   while (next_stmt);
203
204   return -1;
205 }
206
207
208 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
209    they are of a valid type and that they match the defs of the first stmt of
210    the SLP group (stored in OPRNDS_INFO).  If there was a fatal error
211    return -1, if the error could be corrected by swapping operands of the
212    operation return 1, if everything is ok return 0.  */
213
214 static int 
215 vect_get_and_check_slp_defs (vec_info *vinfo,
216                              gimple *stmt, unsigned stmt_num,
217                              vec<slp_oprnd_info> *oprnds_info)
218 {
219   tree oprnd;
220   unsigned int i, number_of_oprnds;
221   gimple *def_stmt;
222   enum vect_def_type dt = vect_uninitialized_def;
223   bool pattern = false;
224   slp_oprnd_info oprnd_info;
225   int first_op_idx = 1;
226   bool commutative = false;
227   bool first_op_cond = false;
228   bool first = stmt_num == 0;
229   bool second = stmt_num == 1;
230
231   if (is_gimple_call (stmt))
232     {
233       number_of_oprnds = gimple_call_num_args (stmt);
234       first_op_idx = 3;
235     }
236   else if (is_gimple_assign (stmt))
237     {
238       enum tree_code code = gimple_assign_rhs_code (stmt);
239       number_of_oprnds = gimple_num_ops (stmt) - 1;
240       if (gimple_assign_rhs_code (stmt) == COND_EXPR
241           && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
242         {
243           first_op_cond = true;
244           commutative = true;
245           number_of_oprnds++;
246         }
247       else
248         commutative = commutative_tree_code (code);
249     }
250   else
251     return -1;
252
253   bool swapped = false;
254   for (i = 0; i < number_of_oprnds; i++)
255     {
256 again:
257       if (first_op_cond)
258         {
259           if (i == 0 || i == 1)
260             oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
261                                   swapped ? !i : i);
262           else
263             oprnd = gimple_op (stmt, first_op_idx + i - 1);
264         }
265       else
266         oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
267
268       oprnd_info = (*oprnds_info)[i];
269
270       if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
271         {
272           if (dump_enabled_p ())
273             {
274               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
275                                "Build SLP failed: can't analyze def for ");
276               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
277               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
278             }
279
280           return -1;
281         }
282
283       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
284          from the pattern.  Check that all the stmts of the node are in the
285          pattern.  */
286       if (def_stmt && gimple_bb (def_stmt)
287           && vect_stmt_in_region_p (vinfo, def_stmt)
288           && vinfo_for_stmt (def_stmt)
289           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
290           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
291           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
292         {
293           pattern = true;
294           if (!first && !oprnd_info->first_pattern
295               /* Allow different pattern state for the defs of the
296                  first stmt in reduction chains.  */
297               && (oprnd_info->first_dt != vect_reduction_def
298                   || (!second && !oprnd_info->second_pattern)))
299             {
300               if (i == 0
301                   && !swapped
302                   && commutative)
303                 {
304                   swapped = true;
305                   goto again;
306                 }
307
308               if (dump_enabled_p ())
309                 {
310                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
311                                    "Build SLP failed: some of the stmts"
312                                    " are in a pattern, and others are not ");
313                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
314                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
315                 }
316
317               return 1;
318             }
319
320           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
321           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
322
323           if (dt == vect_unknown_def_type)
324             {
325               if (dump_enabled_p ())
326                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
327                                  "Unsupported pattern.\n");
328               return -1;
329             }
330
331           switch (gimple_code (def_stmt))
332             {
333             case GIMPLE_PHI:
334             case GIMPLE_ASSIGN:
335               break;
336
337             default:
338               if (dump_enabled_p ())
339                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340                                  "unsupported defining stmt:\n");
341               return -1;
342             }
343         }
344
345       if (second)
346         oprnd_info->second_pattern = pattern;
347
348       if (first)
349         {
350           oprnd_info->first_dt = dt;
351           oprnd_info->first_pattern = pattern;
352           oprnd_info->first_op_type = TREE_TYPE (oprnd);
353         }
354       else
355         {
356           /* Not first stmt of the group, check that the def-stmt/s match
357              the def-stmt/s of the first stmt.  Allow different definition
358              types for reduction chains: the first stmt must be a
359              vect_reduction_def (a phi node), and the rest
360              vect_internal_def.  */
361           if (((oprnd_info->first_dt != dt
362                 && !(oprnd_info->first_dt == vect_reduction_def
363                      && dt == vect_internal_def)
364                 && !((oprnd_info->first_dt == vect_external_def
365                       || oprnd_info->first_dt == vect_constant_def)
366                      && (dt == vect_external_def
367                          || dt == vect_constant_def)))
368                || !types_compatible_p (oprnd_info->first_op_type,
369                                        TREE_TYPE (oprnd))))
370             {
371               /* Try swapping operands if we got a mismatch.  */
372               if (i == 0
373                   && !swapped
374                   && commutative)
375                 {
376                   swapped = true;
377                   goto again;
378                 }
379
380               if (dump_enabled_p ())
381                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
382                                  "Build SLP failed: different types\n");
383
384               return 1;
385             }
386         }
387
388       /* Check the types of the definitions.  */
389       switch (dt)
390         {
391         case vect_constant_def:
392         case vect_external_def:
393         case vect_reduction_def:
394           break;
395
396         case vect_internal_def:
397           oprnd_info->def_stmts.quick_push (def_stmt);
398           break;
399
400         default:
401           /* FORNOW: Not supported.  */
402           if (dump_enabled_p ())
403             {
404               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
405                                "Build SLP failed: illegal type of def ");
406               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
407               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
408             }
409
410           return -1;
411         }
412     }
413
414   /* Swap operands.  */
415   if (swapped)
416     {
417       /* If there are already uses of this stmt in a SLP instance then
418          we've committed to the operand order and can't swap it.  */
419       if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
420         {
421           if (dump_enabled_p ())
422             {
423               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
424                                "Build SLP failed: cannot swap operands of "
425                                "shared stmt ");
426               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
427             }
428           return -1;
429         }
430
431       if (first_op_cond)
432         {
433           tree cond = gimple_assign_rhs1 (stmt);
434           swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
435                              &TREE_OPERAND (cond, 1));
436           TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
437         }
438       else
439         swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
440                            gimple_assign_rhs2_ptr (stmt));
441       if (dump_enabled_p ())
442         {
443           dump_printf_loc (MSG_NOTE, vect_location,
444                            "swapped operands to match def types in ");
445           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
446         }
447     }
448
449   return 0;
450 }
451
452
453 /* Verify if the scalar stmts STMTS are isomorphic, require data
454    permutation or are of unsupported types of operation.  Return
455    true if they are, otherwise return false and indicate in *MATCHES
456    which stmts are not isomorphic to the first one.  If MATCHES[0]
457    is false then this indicates the comparison could not be
458    carried out or the stmts will never be vectorized by SLP.  */
459
460 static bool
461 vect_build_slp_tree_1 (vec_info *vinfo,
462                        vec<gimple *> stmts, unsigned int group_size,
463                        unsigned nops, unsigned int *max_nunits,
464                        bool *matches, bool *two_operators)
465 {
466   unsigned int i;
467   gimple *first_stmt = stmts[0], *stmt = stmts[0];
468   enum tree_code first_stmt_code = ERROR_MARK;
469   enum tree_code alt_stmt_code = ERROR_MARK;
470   enum tree_code rhs_code = ERROR_MARK;
471   enum tree_code first_cond_code = ERROR_MARK;
472   tree lhs;
473   bool need_same_oprnds = false;
474   tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
475   optab optab;
476   int icode;
477   machine_mode optab_op2_mode;
478   machine_mode vec_mode;
479   HOST_WIDE_INT dummy;
480   gimple *first_load = NULL, *prev_first_load = NULL;
481
482   /* For every stmt in NODE find its def stmt/s.  */
483   FOR_EACH_VEC_ELT (stmts, i, stmt)
484     {
485       matches[i] = false;
486
487       if (dump_enabled_p ())
488         {
489           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
490           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
491         }
492
493       /* Fail to vectorize statements marked as unvectorizable.  */
494       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
495         {
496           if (dump_enabled_p ())
497             {
498               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
499                                "Build SLP failed: unvectorizable statement ");
500               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
501               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
502             }
503           /* Fatal mismatch.  */
504           matches[0] = false;
505           return false;
506         }
507
508       lhs = gimple_get_lhs (stmt);
509       if (lhs == NULL_TREE)
510         {
511           if (dump_enabled_p ())
512             {
513               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
514                                "Build SLP failed: not GIMPLE_ASSIGN nor "
515                                "GIMPLE_CALL ");
516               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
517               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
518             }
519           /* Fatal mismatch.  */
520           matches[0] = false;
521           return false;
522         }
523
524       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
525       vectype = get_vectype_for_scalar_type (scalar_type);
526       if (!vectype)
527         {
528           if (dump_enabled_p ())
529             {
530               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
531                                "Build SLP failed: unsupported data-type ");
532               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
533                                  scalar_type);
534               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
535             }
536           /* Fatal mismatch.  */
537           matches[0] = false;
538           return false;
539         }
540
541       /* If populating the vector type requires unrolling then fail
542          before adjusting *max_nunits for basic-block vectorization.  */
543       if (is_a <bb_vec_info> (vinfo)
544           && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
545         {
546           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
547                            "Build SLP failed: unrolling required "
548                            "in basic block SLP\n");
549           /* Fatal mismatch.  */
550           matches[0] = false;
551           return false;
552         }
553
554       /* In case of multiple types we need to detect the smallest type.  */
555       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
556         *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
557
558       if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
559         {
560           rhs_code = CALL_EXPR;
561           if (gimple_call_internal_p (call_stmt)
562               || gimple_call_tail_p (call_stmt)
563               || gimple_call_noreturn_p (call_stmt)
564               || !gimple_call_nothrow_p (call_stmt)
565               || gimple_call_chain (call_stmt))
566             {
567               if (dump_enabled_p ())
568                 {
569                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
570                                    "Build SLP failed: unsupported call type ");
571                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
572                                     call_stmt, 0);
573                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
574                 }
575               /* Fatal mismatch.  */
576               matches[0] = false;
577               return false;
578             }
579         }
580       else
581         rhs_code = gimple_assign_rhs_code (stmt);
582
583       /* Check the operation.  */
584       if (i == 0)
585         {
586           first_stmt_code = rhs_code;
587
588           /* Shift arguments should be equal in all the packed stmts for a
589              vector shift with scalar shift operand.  */
590           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
591               || rhs_code == LROTATE_EXPR
592               || rhs_code == RROTATE_EXPR)
593             {
594               vec_mode = TYPE_MODE (vectype);
595
596               /* First see if we have a vector/vector shift.  */
597               optab = optab_for_tree_code (rhs_code, vectype,
598                                            optab_vector);
599
600               if (!optab
601                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
602                 {
603                   /* No vector/vector shift, try for a vector/scalar shift.  */
604                   optab = optab_for_tree_code (rhs_code, vectype,
605                                                optab_scalar);
606
607                   if (!optab)
608                     {
609                       if (dump_enabled_p ())
610                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
611                                          "Build SLP failed: no optab.\n");
612                       /* Fatal mismatch.  */
613                       matches[0] = false;
614                       return false;
615                     }
616                   icode = (int) optab_handler (optab, vec_mode);
617                   if (icode == CODE_FOR_nothing)
618                     {
619                       if (dump_enabled_p ())
620                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
621                                          "Build SLP failed: "
622                                          "op not supported by target.\n");
623                       /* Fatal mismatch.  */
624                       matches[0] = false;
625                       return false;
626                     }
627                   optab_op2_mode = insn_data[icode].operand[2].mode;
628                   if (!VECTOR_MODE_P (optab_op2_mode))
629                     {
630                       need_same_oprnds = true;
631                       first_op1 = gimple_assign_rhs2 (stmt);
632                     }
633                 }
634             }
635           else if (rhs_code == WIDEN_LSHIFT_EXPR)
636             {
637               need_same_oprnds = true;
638               first_op1 = gimple_assign_rhs2 (stmt);
639             }
640         }
641       else
642         {
643           if (first_stmt_code != rhs_code
644               && alt_stmt_code == ERROR_MARK)
645             alt_stmt_code = rhs_code;
646           if (first_stmt_code != rhs_code
647               && (first_stmt_code != IMAGPART_EXPR
648                   || rhs_code != REALPART_EXPR)
649               && (first_stmt_code != REALPART_EXPR
650                   || rhs_code != IMAGPART_EXPR)
651               /* Handle mismatches in plus/minus by computing both
652                  and merging the results.  */
653               && !((first_stmt_code == PLUS_EXPR
654                     || first_stmt_code == MINUS_EXPR)
655                    && (alt_stmt_code == PLUS_EXPR
656                        || alt_stmt_code == MINUS_EXPR)
657                    && rhs_code == alt_stmt_code)
658               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
659                    && (first_stmt_code == ARRAY_REF
660                        || first_stmt_code == BIT_FIELD_REF
661                        || first_stmt_code == INDIRECT_REF
662                        || first_stmt_code == COMPONENT_REF
663                        || first_stmt_code == MEM_REF)))
664             {
665               if (dump_enabled_p ())
666                 {
667                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
668                                    "Build SLP failed: different operation "
669                                    "in stmt ");
670                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
671                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
672                                    "original stmt ");
673                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
674                                     first_stmt, 0);
675                 }
676               /* Mismatch.  */
677               continue;
678             }
679
680           if (need_same_oprnds
681               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
682             {
683               if (dump_enabled_p ())
684                 {
685                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
686                                    "Build SLP failed: different shift "
687                                    "arguments in ");
688                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
689                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
690                 }
691               /* Mismatch.  */
692               continue;
693             }
694
695           if (rhs_code == CALL_EXPR)
696             {
697               gimple *first_stmt = stmts[0];
698               if (gimple_call_num_args (stmt) != nops
699                   || !operand_equal_p (gimple_call_fn (first_stmt),
700                                        gimple_call_fn (stmt), 0)
701                   || gimple_call_fntype (first_stmt)
702                      != gimple_call_fntype (stmt))
703                 {
704                   if (dump_enabled_p ())
705                     {
706                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
707                                        "Build SLP failed: different calls in ");
708                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
709                                         stmt, 0);
710                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
711                     }
712                   /* Mismatch.  */
713                   continue;
714                 }
715             }
716         }
717
718       /* Grouped store or load.  */
719       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
720         {
721           if (REFERENCE_CLASS_P (lhs))
722             {
723               /* Store.  */
724               ;
725             }
726           else
727             {
728               /* Load.  */
729               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
730               if (prev_first_load)
731                 {
732                   /* Check that there are no loads from different interleaving
733                      chains in the same node.  */
734                   if (prev_first_load != first_load)
735                     {
736                       if (dump_enabled_p ())
737                         {
738                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
739                                            vect_location, 
740                                            "Build SLP failed: different "
741                                            "interleaving chains in one node ");
742                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
743                                             stmt, 0);
744                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
745                         }
746                       /* Mismatch.  */
747                       continue;
748                     }
749                 }
750               else
751                 prev_first_load = first_load;
752            }
753         } /* Grouped access.  */
754       else
755         {
756           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
757             {
758               /* Not grouped load.  */
759               if (dump_enabled_p ())
760                 {
761                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
762                                    "Build SLP failed: not grouped load ");
763                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
764                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
765                 }
766
767               /* FORNOW: Not grouped loads are not supported.  */
768               /* Fatal mismatch.  */
769               matches[0] = false;
770               return false;
771             }
772
773           /* Not memory operation.  */
774           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
775               && TREE_CODE_CLASS (rhs_code) != tcc_unary
776               && TREE_CODE_CLASS (rhs_code) != tcc_expression
777               && TREE_CODE_CLASS (rhs_code) != tcc_comparison
778               && rhs_code != CALL_EXPR)
779             {
780               if (dump_enabled_p ())
781                 {
782                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
783                                    "Build SLP failed: operation");
784                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
785                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
786                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
787                 }
788               /* Fatal mismatch.  */
789               matches[0] = false;
790               return false;
791             }
792
793           if (rhs_code == COND_EXPR)
794             {
795               tree cond_expr = gimple_assign_rhs1 (stmt);
796
797               if (i == 0)
798                 first_cond_code = TREE_CODE (cond_expr);
799               else if (first_cond_code != TREE_CODE (cond_expr))
800                 {
801                   if (dump_enabled_p ())
802                     {
803                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
804                                        "Build SLP failed: different"
805                                        " operation");
806                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
807                                         stmt, 0);
808                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
809                     }
810                   /* Mismatch.  */
811                   continue;
812                 }
813             }
814         }
815
816       matches[i] = true;
817     }
818
819   for (i = 0; i < group_size; ++i)
820     if (!matches[i])
821       return false;
822
823   /* If we allowed a two-operation SLP node verify the target can cope
824      with the permute we are going to use.  */
825   if (alt_stmt_code != ERROR_MARK
826       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
827     {
828       unsigned char *sel
829         = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
830       for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
831         {
832           sel[i] = i;
833           if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
834             sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
835         }
836       if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
837         {
838           for (i = 0; i < group_size; ++i)
839             if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
840               {
841                 matches[i] = false;
842                 if (dump_enabled_p ())
843                   {
844                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
845                                      "Build SLP failed: different operation "
846                                      "in stmt ");
847                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
848                                       stmts[i], 0);
849                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850                                      "original stmt ");
851                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
852                                       first_stmt, 0);
853                   }
854               }
855           return false;
856         }
857       *two_operators = true;
858     }
859
860   return true;
861 }
862
863 /* Recursively build an SLP tree starting from NODE.
864    Fail (and return a value not equal to zero) if def-stmts are not
865    isomorphic, require data permutation or are of unsupported types of
866    operation.  Otherwise, return 0.
867    The value returned is the depth in the SLP tree where a mismatch
868    was found.  */
869
870 static slp_tree
871 vect_build_slp_tree (vec_info *vinfo,
872                      vec<gimple *> stmts, unsigned int group_size,
873                      unsigned int *max_nunits,
874                      vec<slp_tree> *loads,
875                      bool *matches, unsigned *npermutes, unsigned *tree_size,
876                      unsigned max_tree_size)
877 {
878   unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
879   gimple *stmt;
880   slp_tree node;
881
882   matches[0] = false;
883
884   stmt = stmts[0];
885   if (is_gimple_call (stmt))
886     nops = gimple_call_num_args (stmt);
887   else if (is_gimple_assign (stmt))
888     {
889       nops = gimple_num_ops (stmt) - 1;
890       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
891         nops++;
892     }
893   else
894     return NULL;
895
896   bool two_operators = false;
897   if (!vect_build_slp_tree_1 (vinfo,
898                               stmts, group_size, nops,
899                               &this_max_nunits, matches, &two_operators))
900     return NULL;
901
902   /* If the SLP node is a load, terminate the recursion.  */
903   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
904       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
905     {
906       *max_nunits = this_max_nunits;
907       node = vect_create_new_slp_node (stmts);
908       loads->safe_push (node);
909       return node;
910     }
911
912   /* Get at the operands, verifying they are compatible.  */
913   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
914   slp_oprnd_info oprnd_info;
915   FOR_EACH_VEC_ELT (stmts, i, stmt)
916     {
917       switch (vect_get_and_check_slp_defs (vinfo, stmt, i, &oprnds_info))
918         {
919         case 0:
920           break;
921         case -1:
922           matches[0] = false;
923           vect_free_oprnd_info (oprnds_info);
924           return NULL;
925         case 1:
926           matches[i] = false;
927           break;
928         }
929     }
930   for (i = 0; i < group_size; ++i)
931     if (!matches[i])
932       {
933         vect_free_oprnd_info (oprnds_info);
934         return NULL;
935       }
936
937   auto_vec<slp_tree, 4> children;
938   auto_vec<slp_tree> this_loads;
939
940   stmt = stmts[0];
941
942   /* Create SLP_TREE nodes for the definition node/s.  */
943   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
944     {
945       slp_tree child;
946       unsigned old_nloads = this_loads.length ();
947       unsigned old_tree_size = this_tree_size;
948       unsigned int j;
949
950       if (oprnd_info->first_dt != vect_internal_def)
951         continue;
952
953       if (++this_tree_size > max_tree_size)
954         {
955           FOR_EACH_VEC_ELT (children, j, child)
956             vect_free_slp_tree (child);
957           vect_free_oprnd_info (oprnds_info);
958           return NULL;
959         }
960
961       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
962                                         group_size, &this_max_nunits,
963                                         &this_loads, matches, npermutes,
964                                         &this_tree_size,
965                                         max_tree_size)) != NULL)
966         {
967           /* If we have all children of child built up from scalars then just
968              throw that away and build it up this node from scalars.  */
969           if (!SLP_TREE_CHILDREN (child).is_empty ()
970               /* ???  Rejecting patterns this way doesn't work.  We'd have to
971                  do extra work to cancel the pattern so the uses see the
972                  scalar version.  */
973               && !is_pattern_stmt_p
974                     (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
975             {
976               slp_tree grandchild;
977
978               FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
979                 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
980                   break;
981               if (!grandchild)
982                 {
983                   /* Roll back.  */
984                   this_loads.truncate (old_nloads);
985                   this_tree_size = old_tree_size;
986                   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
987                     vect_free_slp_tree (grandchild);
988                   SLP_TREE_CHILDREN (child).truncate (0);
989
990                   dump_printf_loc (MSG_NOTE, vect_location,
991                                    "Building parent vector operands from "
992                                    "scalars instead\n");
993                   oprnd_info->def_stmts = vNULL;
994                   SLP_TREE_DEF_TYPE (child) = vect_external_def;
995                   children.safe_push (child);
996                   continue;
997                 }
998             }
999
1000           oprnd_info->def_stmts = vNULL;
1001           children.safe_push (child);
1002           continue;
1003         }
1004
1005       /* If the SLP build failed fatally and we analyze a basic-block
1006          simply treat nodes we fail to build as externally defined
1007          (and thus build vectors from the scalar defs).
1008          The cost model will reject outright expensive cases.
1009          ???  This doesn't treat cases where permutation ultimatively
1010          fails (or we don't try permutation below).  Ideally we'd
1011          even compute a permutation that will end up with the maximum
1012          SLP tree size...  */
1013       if (is_a <bb_vec_info> (vinfo)
1014           && !matches[0]
1015           /* ???  Rejecting patterns this way doesn't work.  We'd have to
1016              do extra work to cancel the pattern so the uses see the
1017              scalar version.  */
1018           && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1019         {
1020           dump_printf_loc (MSG_NOTE, vect_location,
1021                            "Building vector operands from scalars\n");
1022           child = vect_create_new_slp_node (oprnd_info->def_stmts);
1023           SLP_TREE_DEF_TYPE (child) = vect_external_def;
1024           children.safe_push (child);
1025           oprnd_info->def_stmts = vNULL;
1026           continue;
1027         }
1028
1029       /* If the SLP build for operand zero failed and operand zero
1030          and one can be commutated try that for the scalar stmts
1031          that failed the match.  */
1032       if (i == 0
1033           /* A first scalar stmt mismatch signals a fatal mismatch.  */
1034           && matches[0]
1035           /* ???  For COND_EXPRs we can swap the comparison operands
1036              as well as the arms under some constraints.  */
1037           && nops == 2
1038           && oprnds_info[1]->first_dt == vect_internal_def
1039           && is_gimple_assign (stmt)
1040           && commutative_tree_code (gimple_assign_rhs_code (stmt))
1041           && ! two_operators
1042           /* Do so only if the number of not successful permutes was nor more
1043              than a cut-ff as re-trying the recursive match on
1044              possibly each level of the tree would expose exponential
1045              behavior.  */
1046           && *npermutes < 4)
1047         {
1048           /* Verify if we can safely swap or if we committed to a specific
1049              operand order already.  */
1050           for (j = 0; j < group_size; ++j)
1051             if (!matches[j]
1052                 && STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j])) != 0)
1053               {
1054                 if (dump_enabled_p ())
1055                   {
1056                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1057                                      "Build SLP failed: cannot swap operands "
1058                                      "of shared stmt ");
1059                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1060                                       stmts[j], 0);
1061                   }
1062                 goto fail;
1063               }
1064
1065           /* Swap mismatched definition stmts.  */
1066           dump_printf_loc (MSG_NOTE, vect_location,
1067                            "Re-trying with swapped operands of stmts ");
1068           for (j = 0; j < group_size; ++j)
1069             if (!matches[j])
1070               {
1071                 std::swap (oprnds_info[0]->def_stmts[j],
1072                            oprnds_info[1]->def_stmts[j]);
1073                 dump_printf (MSG_NOTE, "%d ", j);
1074               }
1075           dump_printf (MSG_NOTE, "\n");
1076           /* And try again with scratch 'matches' ... */
1077           bool *tem = XALLOCAVEC (bool, group_size);
1078           if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1079                                             group_size, &this_max_nunits,
1080                                             &this_loads, tem, npermutes,
1081                                             &this_tree_size,
1082                                             max_tree_size)) != NULL)
1083             {
1084               /* ... so if successful we can apply the operand swapping
1085                  to the GIMPLE IL.  This is necessary because for example
1086                  vect_get_slp_defs uses operand indexes and thus expects
1087                  canonical operand order.  This is also necessary even
1088                  if we end up building the operand from scalars as
1089                  we'll continue to process swapped operand two.  */
1090               for (j = 0; j < group_size; ++j)
1091                 {
1092                   gimple *stmt = stmts[j];
1093                   gimple_set_plf (stmt, GF_PLF_1, false);
1094                 }
1095               for (j = 0; j < group_size; ++j)
1096                 {
1097                   gimple *stmt = stmts[j];
1098                   if (!matches[j])
1099                     {
1100                       /* Avoid swapping operands twice.  */
1101                       if (gimple_plf (stmt, GF_PLF_1))
1102                         continue;
1103                       swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1104                                          gimple_assign_rhs2_ptr (stmt));
1105                       gimple_set_plf (stmt, GF_PLF_1, true);
1106                     }
1107                 }
1108               /* Verify we swap all duplicates or none.  */
1109               if (flag_checking)
1110                 for (j = 0; j < group_size; ++j)
1111                   {
1112                     gimple *stmt = stmts[j];
1113                     gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1114                   }
1115
1116               /* If we have all children of child built up from scalars then
1117                  just throw that away and build it up this node from scalars.  */
1118               if (!SLP_TREE_CHILDREN (child).is_empty ()
1119                   /* ???  Rejecting patterns this way doesn't work.  We'd have
1120                      to do extra work to cancel the pattern so the uses see the
1121                      scalar version.  */
1122                   && !is_pattern_stmt_p
1123                         (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1124                 {
1125                   unsigned int j;
1126                   slp_tree grandchild;
1127
1128                   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1129                     if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1130                       break;
1131                   if (!grandchild)
1132                     {
1133                       /* Roll back.  */
1134                       this_loads.truncate (old_nloads);
1135                       this_tree_size = old_tree_size;
1136                       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1137                         vect_free_slp_tree (grandchild);
1138                       SLP_TREE_CHILDREN (child).truncate (0);
1139
1140                       dump_printf_loc (MSG_NOTE, vect_location,
1141                                        "Building parent vector operands from "
1142                                        "scalars instead\n");
1143                       oprnd_info->def_stmts = vNULL;
1144                       SLP_TREE_DEF_TYPE (child) = vect_external_def;
1145                       children.safe_push (child);
1146                       continue;
1147                     }
1148                 }
1149
1150               oprnd_info->def_stmts = vNULL;
1151               children.safe_push (child);
1152               continue;
1153             }
1154
1155           ++*npermutes;
1156         }
1157
1158 fail:
1159       gcc_assert (child == NULL);
1160       FOR_EACH_VEC_ELT (children, j, child)
1161         vect_free_slp_tree (child);
1162       vect_free_oprnd_info (oprnds_info);
1163       return NULL;
1164     }
1165
1166   vect_free_oprnd_info (oprnds_info);
1167
1168   if (tree_size)
1169     *tree_size += this_tree_size;
1170   *max_nunits = this_max_nunits;
1171   loads->safe_splice (this_loads);
1172
1173   node = vect_create_new_slp_node (stmts);
1174   SLP_TREE_TWO_OPERATORS (node) = two_operators;
1175   SLP_TREE_CHILDREN (node).splice (children);
1176   return node;
1177 }
1178
1179 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1180
1181 static void
1182 vect_print_slp_tree (int dump_kind, location_t loc, slp_tree node)
1183 {
1184   int i;
1185   gimple *stmt;
1186   slp_tree child;
1187
1188   dump_printf_loc (dump_kind, loc, "node%s\n",
1189                    SLP_TREE_DEF_TYPE (node) != vect_internal_def
1190                    ? " (external)" : "");
1191   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1192     {
1193       dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1194       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1195     }
1196   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1197     vect_print_slp_tree (dump_kind, loc, child);
1198 }
1199
1200
1201 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1202    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1203    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1204    stmts in NODE are to be marked.  */
1205
1206 static void
1207 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1208 {
1209   int i;
1210   gimple *stmt;
1211   slp_tree child;
1212
1213   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1214     return;
1215
1216   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1217     if (j < 0 || i == j)
1218       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1219
1220   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1221     vect_mark_slp_stmts (child, mark, j);
1222 }
1223
1224
1225 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1226
1227 static void
1228 vect_mark_slp_stmts_relevant (slp_tree node)
1229 {
1230   int i;
1231   gimple *stmt;
1232   stmt_vec_info stmt_info;
1233   slp_tree child;
1234
1235   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1236     return;
1237
1238   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1239     {
1240       stmt_info = vinfo_for_stmt (stmt);
1241       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1242                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1243       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1244     }
1245
1246   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1247     vect_mark_slp_stmts_relevant (child);
1248 }
1249
1250
1251 /* Rearrange the statements of NODE according to PERMUTATION.  */
1252
1253 static void
1254 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1255                           vec<unsigned> permutation)
1256 {
1257   gimple *stmt;
1258   vec<gimple *> tmp_stmts;
1259   unsigned int i;
1260   slp_tree child;
1261
1262   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1263     vect_slp_rearrange_stmts (child, group_size, permutation);
1264
1265   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1266   tmp_stmts.create (group_size);
1267   tmp_stmts.quick_grow_cleared (group_size);
1268
1269   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1270     tmp_stmts[permutation[i]] = stmt;
1271
1272   SLP_TREE_SCALAR_STMTS (node).release ();
1273   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1274 }
1275
1276
1277 /* Attempt to reorder stmts in a reduction chain so that we don't
1278    require any load permutation.  Return true if that was possible,
1279    otherwise return false.  */
1280
1281 static bool
1282 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1283 {
1284   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1285   unsigned int i, j;
1286   sbitmap load_index;
1287   unsigned int lidx;
1288   slp_tree node, load;
1289
1290   /* Compare all the permutation sequences to the first one.  We know
1291      that at least one load is permuted.  */
1292   node = SLP_INSTANCE_LOADS (slp_instn)[0];
1293   if (!node->load_permutation.exists ())
1294     return false;
1295   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1296     {
1297       if (!load->load_permutation.exists ())
1298         return false;
1299       FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1300         if (lidx != node->load_permutation[j])
1301           return false;
1302     }
1303
1304   /* Check that the loads in the first sequence are different and there
1305      are no gaps between them.  */
1306   load_index = sbitmap_alloc (group_size);
1307   bitmap_clear (load_index);
1308   FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1309     {
1310       if (lidx >= group_size)
1311         return false;
1312       if (bitmap_bit_p (load_index, lidx))
1313         {
1314           sbitmap_free (load_index);
1315           return false;
1316         }
1317       bitmap_set_bit (load_index, lidx);
1318     }
1319   for (i = 0; i < group_size; i++)
1320     if (!bitmap_bit_p (load_index, i))
1321       {
1322         sbitmap_free (load_index);
1323         return false;
1324       }
1325   sbitmap_free (load_index);
1326
1327   /* This permutation is valid for reduction.  Since the order of the
1328      statements in the nodes is not important unless they are memory
1329      accesses, we can rearrange the statements in all the nodes
1330      according to the order of the loads.  */
1331   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1332                             node->load_permutation);
1333
1334   /* We are done, no actual permutations need to be generated.  */
1335   unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1336   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1337     {
1338       gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1339       first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1340       /* But we have to keep those permutations that are required because
1341          of handling of gaps.  */
1342       if (unrolling_factor == 1
1343           || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1344               && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1345         SLP_TREE_LOAD_PERMUTATION (node).release ();
1346       else
1347         for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1348           SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1349     }
1350
1351   return true;
1352 }
1353
1354 /* Check if the required load permutations in the SLP instance
1355    SLP_INSTN are supported.  */
1356
1357 static bool
1358 vect_supported_load_permutation_p (slp_instance slp_instn)
1359 {
1360   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1361   unsigned int i, j, k, next;
1362   slp_tree node;
1363   gimple *stmt, *load, *next_load;
1364
1365   if (dump_enabled_p ())
1366     {
1367       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1368       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1369         if (node->load_permutation.exists ())
1370           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1371             dump_printf (MSG_NOTE, "%d ", next);
1372         else
1373           for (k = 0; k < group_size; ++k)
1374             dump_printf (MSG_NOTE, "%d ", k);
1375       dump_printf (MSG_NOTE, "\n");
1376     }
1377
1378   /* In case of reduction every load permutation is allowed, since the order
1379      of the reduction statements is not important (as opposed to the case of
1380      grouped stores).  The only condition we need to check is that all the
1381      load nodes are of the same size and have the same permutation (and then
1382      rearrange all the nodes of the SLP instance according to this 
1383      permutation).  */
1384
1385   /* Check that all the load nodes are of the same size.  */
1386   /* ???  Can't we assert this? */
1387   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1388     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1389       return false;
1390
1391   node = SLP_INSTANCE_TREE (slp_instn);
1392   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1393
1394   /* Reduction (there are no data-refs in the root).
1395      In reduction chain the order of the loads is not important.  */
1396   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1397       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1398     vect_attempt_slp_rearrange_stmts (slp_instn);
1399
1400   /* In basic block vectorization we allow any subchain of an interleaving
1401      chain.
1402      FORNOW: not supported in loop SLP because of realignment compications.  */
1403   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1404     {
1405       /* Check whether the loads in an instance form a subchain and thus
1406          no permutation is necessary.  */
1407       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1408         {
1409           if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1410             continue;
1411           bool subchain_p = true;
1412           next_load = NULL;
1413           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1414             {
1415               if (j != 0
1416                   && (next_load != load
1417                       || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1418                 {
1419                   subchain_p = false;
1420                   break;
1421                 }
1422               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1423             }
1424           if (subchain_p)
1425             SLP_TREE_LOAD_PERMUTATION (node).release ();
1426           else
1427             {
1428               /* Verify the permutation can be generated.  */
1429               vec<tree> tem;
1430               if (!vect_transform_slp_perm_load (node, tem, NULL,
1431                                                  1, slp_instn, true))
1432                 {
1433                   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1434                                    vect_location,
1435                                    "unsupported load permutation\n");
1436                   return false;
1437                 }
1438             }
1439         }
1440       return true;
1441     }
1442
1443   /* For loop vectorization verify we can generate the permutation.  */
1444   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1445     if (node->load_permutation.exists ()
1446         && !vect_transform_slp_perm_load
1447               (node, vNULL, NULL,
1448                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1449       return false;
1450
1451   return true;
1452 }
1453
1454
1455 /* Find the last store in SLP INSTANCE.  */
1456
1457 gimple *
1458 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1459 {
1460   gimple *last = NULL, *stmt;
1461
1462   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1463     {
1464       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1465       if (is_pattern_stmt_p (stmt_vinfo))
1466         last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1467       else
1468         last = get_later_stmt (stmt, last);
1469     }
1470
1471   return last;
1472 }
1473
1474 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1475
1476 static void
1477 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1478                          stmt_vector_for_cost *prologue_cost_vec,
1479                          stmt_vector_for_cost *body_cost_vec,
1480                          unsigned ncopies_for_cost)
1481 {
1482   unsigned i, j;
1483   slp_tree child;
1484   gimple *stmt;
1485   stmt_vec_info stmt_info;
1486   tree lhs;
1487
1488   /* Recurse down the SLP tree.  */
1489   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1490     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1491       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1492                                body_cost_vec, ncopies_for_cost);
1493
1494   /* Look at the first scalar stmt to determine the cost.  */
1495   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1496   stmt_info = vinfo_for_stmt (stmt);
1497   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1498     {
1499       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1500         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1501                                vect_uninitialized_def,
1502                                node, prologue_cost_vec, body_cost_vec);
1503       else
1504         {
1505           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1506           if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1507             {
1508               /* If the load is permuted then the alignment is determined by
1509                  the first group element not by the first scalar stmt DR.  */
1510               stmt = GROUP_FIRST_ELEMENT (stmt_info);
1511               stmt_info = vinfo_for_stmt (stmt);
1512               /* Record the cost for the permutation.  */
1513               record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1514                                 stmt_info, 0, vect_body);
1515               /* And adjust the number of loads performed.  */
1516               unsigned nunits
1517                 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1518               ncopies_for_cost
1519                 = (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1520                    + nunits - 1) / nunits;
1521               ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1522             }
1523           /* Record the cost for the vector loads.  */
1524           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1525                                 node, prologue_cost_vec, body_cost_vec);
1526           return;
1527         }
1528     }
1529   else
1530     {
1531       record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1532                         stmt_info, 0, vect_body);
1533       if (SLP_TREE_TWO_OPERATORS (node))
1534         {
1535           record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1536                             stmt_info, 0, vect_body);
1537           record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1538                             stmt_info, 0, vect_body);
1539         }
1540     }
1541
1542   /* Push SLP node def-type to stmts.  */
1543   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1544     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1545       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1546         STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1547
1548   /* Scan operands and account for prologue cost of constants/externals.
1549      ???  This over-estimates cost for multiple uses and should be
1550      re-engineered.  */
1551   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1552   lhs = gimple_get_lhs (stmt);
1553   for (i = 0; i < gimple_num_ops (stmt); ++i)
1554     {
1555       tree op = gimple_op (stmt, i);
1556       gimple *def_stmt;
1557       enum vect_def_type dt;
1558       if (!op || op == lhs)
1559         continue;
1560       if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1561         {
1562           /* Without looking at the actual initializer a vector of
1563              constants can be implemented as load from the constant pool.
1564              ???  We need to pass down stmt_info for a vector type
1565              even if it points to the wrong stmt.  */
1566           if (dt == vect_constant_def)
1567             record_stmt_cost (prologue_cost_vec, 1, vector_load,
1568                               stmt_info, 0, vect_prologue);
1569           else if (dt == vect_external_def)
1570             record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1571                               stmt_info, 0, vect_prologue);
1572         }
1573     }
1574
1575   /* Restore stmt def-types.  */
1576   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1577     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1578       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1579         STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1580 }
1581
1582 /* Compute the cost for the SLP instance INSTANCE.  */
1583
1584 static void
1585 vect_analyze_slp_cost (slp_instance instance, void *data)
1586 {
1587   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1588   unsigned ncopies_for_cost;
1589   stmt_info_for_cost *si;
1590   unsigned i;
1591
1592   if (dump_enabled_p ())
1593     dump_printf_loc (MSG_NOTE, vect_location,
1594                      "=== vect_analyze_slp_cost ===\n");
1595
1596   /* Calculate the number of vector stmts to create based on the unrolling
1597      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1598      GROUP_SIZE / NUNITS otherwise.  */
1599   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1600   slp_tree node = SLP_INSTANCE_TREE (instance);
1601   stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1602   /* Adjust the group_size by the vectorization factor which is always one
1603      for basic-block vectorization.  */
1604   if (STMT_VINFO_LOOP_VINFO (stmt_info))
1605     group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1606   unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1607   /* For reductions look at a reduction operand in case the reduction
1608      operation is widening like DOT_PROD or SAD.  */
1609   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1610     {
1611       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1612       switch (gimple_assign_rhs_code (stmt))
1613         {
1614         case DOT_PROD_EXPR:
1615         case SAD_EXPR:
1616           nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1617                                 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1618           break;
1619         default:;
1620         }
1621     }
1622   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1623
1624   prologue_cost_vec.create (10);
1625   body_cost_vec.create (10);
1626   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1627                            &prologue_cost_vec, &body_cost_vec,
1628                            ncopies_for_cost);
1629
1630   /* Record the prologue costs, which were delayed until we were
1631      sure that SLP was successful.  */
1632   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1633     {
1634       struct _stmt_vec_info *stmt_info
1635         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1636       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1637                             si->misalign, vect_prologue);
1638     }
1639
1640   /* Record the instance's instructions in the target cost model.  */
1641   FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1642     {
1643       struct _stmt_vec_info *stmt_info
1644         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1645       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1646                             si->misalign, vect_body);
1647     }
1648
1649   prologue_cost_vec.release ();
1650   body_cost_vec.release ();
1651 }
1652
1653 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1654    one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1655    the first GROUP1_SIZE stmts, since stores are consecutive), the second
1656    containing the remainder.
1657    Return the first stmt in the second group.  */
1658
1659 static gimple *
1660 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1661 {
1662   stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1663   gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1664   gcc_assert (group1_size > 0);
1665   int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1666   gcc_assert (group2_size > 0);
1667   GROUP_SIZE (first_vinfo) = group1_size;
1668
1669   gimple *stmt = first_stmt;
1670   for (unsigned i = group1_size; i > 1; i--)
1671     {
1672       stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1673       gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1674     }
1675   /* STMT is now the last element of the first group.  */
1676   gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1677   GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1678
1679   GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1680   for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1681     {
1682       GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1683       gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1684     }
1685
1686   /* For the second group, the GROUP_GAP is that before the original group,
1687      plus skipping over the first vector.  */
1688   GROUP_GAP (vinfo_for_stmt (group2)) =
1689     GROUP_GAP (first_vinfo) + group1_size;
1690
1691   /* GROUP_GAP of the first group now has to skip over the second group too.  */
1692   GROUP_GAP (first_vinfo) += group2_size;
1693
1694   if (dump_enabled_p ())
1695     dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1696                      group1_size, group2_size);
1697
1698   return group2;
1699 }
1700
1701 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1702    vect_build_slp_tree to build a tree of packed stmts if possible.
1703    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1704
1705 static bool
1706 vect_analyze_slp_instance (vec_info *vinfo,
1707                            gimple *stmt, unsigned max_tree_size)
1708 {
1709   slp_instance new_instance;
1710   slp_tree node;
1711   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1712   unsigned int unrolling_factor = 1, nunits;
1713   tree vectype, scalar_type = NULL_TREE;
1714   gimple *next;
1715   unsigned int i;
1716   unsigned int max_nunits = 0;
1717   vec<slp_tree> loads;
1718   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1719   vec<gimple *> scalar_stmts;
1720
1721   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1722     {
1723       if (dr)
1724         {
1725           scalar_type = TREE_TYPE (DR_REF (dr));
1726           vectype = get_vectype_for_scalar_type (scalar_type);
1727         }
1728       else
1729         {
1730           gcc_assert (is_a <loop_vec_info> (vinfo));
1731           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1732         }
1733
1734       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1735     }
1736   else
1737     {
1738       gcc_assert (is_a <loop_vec_info> (vinfo));
1739       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1740       group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1741     }
1742
1743   if (!vectype)
1744     {
1745       if (dump_enabled_p ())
1746         {
1747           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1748                            "Build SLP failed: unsupported data-type ");
1749           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1750           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1751         }
1752
1753       return false;
1754     }
1755   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1756
1757   /* Calculate the unrolling factor.  */
1758   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1759   if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
1760     {
1761       if (dump_enabled_p ())
1762         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1763                          "Build SLP failed: unrolling required in basic"
1764                          " block SLP\n");
1765
1766       return false;
1767     }
1768
1769   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1770   scalar_stmts.create (group_size);
1771   next = stmt;
1772   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1773     {
1774       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1775       while (next)
1776         {
1777           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1778               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1779             scalar_stmts.safe_push (
1780                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1781           else
1782             scalar_stmts.safe_push (next);
1783           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1784         }
1785       /* Mark the first element of the reduction chain as reduction to properly
1786          transform the node.  In the reduction analysis phase only the last
1787          element of the chain is marked as reduction.  */
1788       if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1789         STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1790     }
1791   else
1792     {
1793       /* Collect reduction statements.  */
1794       vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1795       for (i = 0; reductions.iterate (i, &next); i++)
1796         scalar_stmts.safe_push (next);
1797     }
1798
1799   loads.create (group_size);
1800
1801   /* Build the tree for the SLP instance.  */
1802   bool *matches = XALLOCAVEC (bool, group_size);
1803   unsigned npermutes = 0;
1804   if ((node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
1805                                    &max_nunits, &loads, matches, &npermutes,
1806                                    NULL, max_tree_size)) != NULL)
1807     {
1808       /* Calculate the unrolling factor based on the smallest type.  */
1809       if (max_nunits > nunits)
1810         unrolling_factor = least_common_multiple (max_nunits, group_size)
1811                            / group_size;
1812
1813       if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
1814         {
1815           if (dump_enabled_p ())
1816             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1817                              "Build SLP failed: unrolling required in basic"
1818                              " block SLP\n");
1819           vect_free_slp_tree (node);
1820           loads.release ();
1821           return false;
1822         }
1823
1824       /* Create a new SLP instance.  */
1825       new_instance = XNEW (struct _slp_instance);
1826       SLP_INSTANCE_TREE (new_instance) = node;
1827       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1828       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1829       SLP_INSTANCE_LOADS (new_instance) = loads;
1830
1831       /* Compute the load permutation.  */
1832       slp_tree load_node;
1833       bool loads_permuted = false;
1834       FOR_EACH_VEC_ELT (loads, i, load_node)
1835         {
1836           vec<unsigned> load_permutation;
1837           int j;
1838           gimple *load, *first_stmt;
1839           bool this_load_permuted = false;
1840           load_permutation.create (group_size);
1841           first_stmt = GROUP_FIRST_ELEMENT
1842               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1843           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1844             {
1845               int load_place
1846                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1847               gcc_assert (load_place != -1);
1848               if (load_place != j)
1849                 this_load_permuted = true;
1850               load_permutation.safe_push (load_place);
1851             }
1852           if (!this_load_permuted
1853               /* The load requires permutation when unrolling exposes
1854                  a gap either because the group is larger than the SLP
1855                  group-size or because there is a gap between the groups.  */
1856               && (unrolling_factor == 1
1857                   || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1858                       && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
1859             {
1860               load_permutation.release ();
1861               continue;
1862             }
1863           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1864           loads_permuted = true;
1865         }
1866
1867       if (loads_permuted)
1868         {
1869           if (!vect_supported_load_permutation_p (new_instance))
1870             {
1871               if (dump_enabled_p ())
1872                 {
1873                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1874                                    "Build SLP failed: unsupported load "
1875                                    "permutation ");
1876                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1877                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1878                 }
1879               vect_free_slp_instance (new_instance);
1880               return false;
1881             }
1882         }
1883
1884       /* If the loads and stores can be handled with load/store-lane
1885          instructions do not generate this SLP instance.  */
1886       if (is_a <loop_vec_info> (vinfo)
1887           && loads_permuted
1888           && dr && vect_store_lanes_supported (vectype, group_size))
1889         {
1890           slp_tree load_node;
1891           FOR_EACH_VEC_ELT (loads, i, load_node)
1892             {
1893               gimple *first_stmt = GROUP_FIRST_ELEMENT
1894                   (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1895               stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
1896               /* Use SLP for strided accesses (or if we can't load-lanes).  */
1897               if (STMT_VINFO_STRIDED_P (stmt_vinfo)
1898                   || ! vect_load_lanes_supported
1899                         (STMT_VINFO_VECTYPE (stmt_vinfo),
1900                          GROUP_SIZE (stmt_vinfo)))
1901                 break;
1902             }
1903           if (i == loads.length ())
1904             {
1905               if (dump_enabled_p ())
1906                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1907                                  "Built SLP cancelled: can use "
1908                                  "load/store-lanes\n");
1909               vect_free_slp_instance (new_instance);
1910               return false;
1911             }
1912         }
1913
1914       vinfo->slp_instances.safe_push (new_instance);
1915
1916       if (dump_enabled_p ())
1917         {
1918           dump_printf_loc (MSG_NOTE, vect_location,
1919                            "Final SLP tree for instance:\n");
1920           vect_print_slp_tree (MSG_NOTE, vect_location, node);
1921         }
1922
1923       return true;
1924     }
1925
1926   /* Failed to SLP.  */
1927   /* Free the allocated memory.  */
1928   scalar_stmts.release ();
1929   loads.release ();
1930
1931   /* For basic block SLP, try to break the group up into multiples of the
1932      vector size.  */
1933   if (is_a <bb_vec_info> (vinfo)
1934       && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1935       && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1936     {
1937       /* We consider breaking the group only on VF boundaries from the existing
1938          start.  */
1939       for (i = 0; i < group_size; i++)
1940         if (!matches[i]) break;
1941
1942       if (i >= nunits && i < group_size)
1943         {
1944           /* Split into two groups at the first vector boundary before i.  */
1945           gcc_assert ((nunits & (nunits - 1)) == 0);
1946           unsigned group1_size = i & ~(nunits - 1);
1947
1948           gimple *rest = vect_split_slp_store_group (stmt, group1_size);
1949           bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
1950           /* If the first non-match was in the middle of a vector,
1951              skip the rest of that vector.  */
1952           if (group1_size < i)
1953             {
1954               i = group1_size + nunits;
1955               if (i < group_size)
1956                 rest = vect_split_slp_store_group (rest, nunits);
1957             }
1958           if (i < group_size)
1959             res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
1960           return res;
1961         }
1962       /* Even though the first vector did not all match, we might be able to SLP
1963          (some) of the remainder.  FORNOW ignore this possibility.  */
1964     }
1965
1966   return false;
1967 }
1968
1969
1970 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1971    trees of packed scalar stmts if SLP is possible.  */
1972
1973 bool
1974 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
1975 {
1976   unsigned int i;
1977   gimple *first_element;
1978   bool ok = false;
1979
1980   if (dump_enabled_p ())
1981     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1982
1983   /* Find SLP sequences starting from groups of grouped stores.  */
1984   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
1985     if (vect_analyze_slp_instance (vinfo, first_element, max_tree_size))
1986       ok = true;
1987
1988   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
1989     {
1990       if (loop_vinfo->reduction_chains.length () > 0)
1991         {
1992           /* Find SLP sequences starting from reduction chains.  */
1993           FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
1994               if (vect_analyze_slp_instance (vinfo, first_element,
1995                                              max_tree_size))
1996                 ok = true;
1997               else
1998                 return false;
1999
2000           /* Don't try to vectorize SLP reductions if reduction chain was
2001              detected.  */
2002           return ok;
2003         }
2004
2005       /* Find SLP sequences starting from groups of reductions.  */
2006       if (loop_vinfo->reductions.length () > 1
2007           && vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2008                                         max_tree_size))
2009         ok = true;
2010     }
2011
2012   return true;
2013 }
2014
2015
2016 /* For each possible SLP instance decide whether to SLP it and calculate overall
2017    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
2018    least one instance.  */
2019
2020 bool
2021 vect_make_slp_decision (loop_vec_info loop_vinfo)
2022 {
2023   unsigned int i, unrolling_factor = 1;
2024   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2025   slp_instance instance;
2026   int decided_to_slp = 0;
2027
2028   if (dump_enabled_p ())
2029     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2030                      "\n");
2031
2032   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2033     {
2034       /* FORNOW: SLP if you can.  */
2035       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2036         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2037
2038       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
2039          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2040          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
2041       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2042       decided_to_slp++;
2043     }
2044
2045   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2046
2047   if (decided_to_slp && dump_enabled_p ())
2048     dump_printf_loc (MSG_NOTE, vect_location,
2049                      "Decided to SLP %d instances. Unrolling factor %d\n",
2050                      decided_to_slp, unrolling_factor);
2051
2052   return (decided_to_slp > 0);
2053 }
2054
2055
2056 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2057    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
2058
2059 static void
2060 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2061 {
2062   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2063   imm_use_iterator imm_iter;
2064   gimple *use_stmt;
2065   stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2066   slp_tree child;
2067   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2068   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2069   int j;
2070
2071   /* Propagate hybrid down the SLP tree.  */
2072   if (stype == hybrid)
2073     ;
2074   else if (HYBRID_SLP_STMT (stmt_vinfo))
2075     stype = hybrid;
2076   else
2077     {
2078       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
2079       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2080       /* If we get a pattern stmt here we have to use the LHS of the
2081          original stmt for immediate uses.  */
2082       if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2083           && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2084         stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2085       if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2086         FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
2087           {
2088             if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2089               continue;
2090             use_vinfo = vinfo_for_stmt (use_stmt);
2091             if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2092                 && STMT_VINFO_RELATED_STMT (use_vinfo))
2093               use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2094             if (!STMT_SLP_TYPE (use_vinfo)
2095                 && (STMT_VINFO_RELEVANT (use_vinfo)
2096                     || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2097                 && !(gimple_code (use_stmt) == GIMPLE_PHI
2098                      && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2099               {
2100                 if (dump_enabled_p ())
2101                   {
2102                     dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2103                                      "def in non-SLP stmt: ");
2104                     dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2105                   }
2106                 stype = hybrid;
2107               }
2108           }
2109     }
2110
2111   if (stype == hybrid
2112       && !HYBRID_SLP_STMT (stmt_vinfo))
2113     {
2114       if (dump_enabled_p ())
2115         {
2116           dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2117           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2118         }
2119       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2120     }
2121
2122   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2123     if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2124       vect_detect_hybrid_slp_stmts (child, i, stype);
2125 }
2126
2127 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
2128
2129 static tree
2130 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2131 {
2132   walk_stmt_info *wi = (walk_stmt_info *)data;
2133   struct loop *loopp = (struct loop *)wi->info;
2134
2135   if (wi->is_lhs)
2136     return NULL_TREE;
2137
2138   if (TREE_CODE (*tp) == SSA_NAME
2139       && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2140     {
2141       gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2142       if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2143           && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2144         {
2145           if (dump_enabled_p ())
2146             {
2147               dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2148               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2149             }
2150           STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2151         }
2152     }
2153
2154   return NULL_TREE;
2155 }
2156
2157 static tree
2158 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2159                           walk_stmt_info *)
2160 {
2161   /* If the stmt is in a SLP instance then this isn't a reason
2162      to mark use definitions in other SLP instances as hybrid.  */
2163   if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2164     *handled = true;
2165   return NULL_TREE;
2166 }
2167
2168 /* Find stmts that must be both vectorized and SLPed.  */
2169
2170 void
2171 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2172 {
2173   unsigned int i;
2174   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2175   slp_instance instance;
2176
2177   if (dump_enabled_p ())
2178     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2179                      "\n");
2180
2181   /* First walk all pattern stmt in the loop and mark defs of uses as
2182      hybrid because immediate uses in them are not recorded.  */
2183   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2184     {
2185       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2186       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2187            gsi_next (&gsi))
2188         {
2189           gimple *stmt = gsi_stmt (gsi);
2190           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2191           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2192             {
2193               walk_stmt_info wi;
2194               memset (&wi, 0, sizeof (wi));
2195               wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2196               gimple_stmt_iterator gsi2
2197                 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2198               walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2199                                 vect_detect_hybrid_slp_1, &wi);
2200               walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2201                                vect_detect_hybrid_slp_2,
2202                                vect_detect_hybrid_slp_1, &wi);
2203             }
2204         }
2205     }
2206
2207   /* Then walk the SLP instance trees marking stmts with uses in
2208      non-SLP stmts as hybrid, also propagating hybrid down the
2209      SLP tree, collecting the above info on-the-fly.  */
2210   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2211     {
2212       for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2213         vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2214                                       i, pure_slp);
2215     }
2216 }
2217
2218
2219 /* Create and initialize a new bb_vec_info struct for BB, as well as
2220    stmt_vec_info structs for all the stmts in it.  */
2221
2222 static bb_vec_info
2223 new_bb_vec_info (gimple_stmt_iterator region_begin,
2224                  gimple_stmt_iterator region_end)
2225 {
2226   basic_block bb = gsi_bb (region_begin);
2227   bb_vec_info res = NULL;
2228   gimple_stmt_iterator gsi;
2229
2230   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2231   res->kind = vec_info::bb;
2232   BB_VINFO_BB (res) = bb;
2233   res->region_begin = region_begin;
2234   res->region_end = region_end;
2235
2236   for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2237        gsi_next (&gsi))
2238     {
2239       gimple *stmt = gsi_stmt (gsi);
2240       gimple_set_uid (stmt, 0);
2241       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
2242     }
2243
2244   BB_VINFO_GROUPED_STORES (res).create (10);
2245   BB_VINFO_SLP_INSTANCES (res).create (2);
2246   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2247
2248   bb->aux = res;
2249   return res;
2250 }
2251
2252
2253 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2254    stmts in the basic block.  */
2255
2256 static void
2257 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2258 {
2259   slp_instance instance;
2260   unsigned i;
2261
2262   if (!bb_vinfo)
2263     return;
2264
2265   vect_destroy_datarefs (bb_vinfo);
2266   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2267   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2268   FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
2269     vect_free_slp_instance (instance);
2270   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2271   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2272
2273   for (gimple_stmt_iterator si = bb_vinfo->region_begin;
2274        gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
2275     {
2276       gimple *stmt = gsi_stmt (si);
2277       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2278
2279       if (stmt_info)
2280         /* Free stmt_vec_info.  */
2281         free_stmt_vec_info (stmt);
2282
2283       /* Reset region marker.  */
2284       gimple_set_uid (stmt, -1);
2285     }
2286
2287   BB_VINFO_BB (bb_vinfo)->aux = NULL;
2288   free (bb_vinfo);
2289 }
2290
2291
2292 /* Analyze statements contained in SLP tree node after recursively analyzing
2293    the subtree. Return TRUE if the operations are supported.  */
2294
2295 static bool
2296 vect_slp_analyze_node_operations (slp_tree node)
2297 {
2298   bool dummy;
2299   int i, j;
2300   gimple *stmt;
2301   slp_tree child;
2302
2303   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2304     return true;
2305
2306   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2307     if (!vect_slp_analyze_node_operations (child))
2308       return false;
2309
2310   bool res = true;
2311   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2312     {
2313       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2314       gcc_assert (stmt_info);
2315       gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2316
2317       /* Push SLP node def-type to stmt operands.  */
2318       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2319         if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2320           STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[i]))
2321             = SLP_TREE_DEF_TYPE (child);
2322       res = vect_analyze_stmt (stmt, &dummy, node);
2323       /* Restore def-types.  */
2324       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2325         if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2326           STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[i]))
2327             = vect_internal_def;
2328       if (! res)
2329         break;
2330     }
2331
2332   return res;
2333 }
2334
2335
2336 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
2337    operations are supported. */
2338
2339 bool
2340 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2341 {
2342   slp_instance instance;
2343   int i;
2344
2345   if (dump_enabled_p ())
2346     dump_printf_loc (MSG_NOTE, vect_location,
2347                      "=== vect_slp_analyze_operations ===\n");
2348
2349   for (i = 0; slp_instances.iterate (i, &instance); )
2350     {
2351       if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2352         {
2353           dump_printf_loc (MSG_NOTE, vect_location,
2354                            "removing SLP instance operations starting from: ");
2355           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2356                             SLP_TREE_SCALAR_STMTS
2357                               (SLP_INSTANCE_TREE (instance))[0], 0);
2358           vect_free_slp_instance (instance);
2359           slp_instances.ordered_remove (i);
2360         }
2361       else
2362         {
2363           /* Compute the costs of the SLP instance.  */
2364           vect_analyze_slp_cost (instance, data);
2365           i++;
2366         }
2367     }
2368
2369   if (!slp_instances.length ())
2370     return false;
2371
2372   return true;
2373 }
2374
2375
2376 /* Compute the scalar cost of the SLP node NODE and its children
2377    and return it.  Do not account defs that are marked in LIFE and
2378    update LIFE according to uses of NODE.  */
2379
2380 static unsigned
2381 vect_bb_slp_scalar_cost (basic_block bb,
2382                          slp_tree node, vec<bool, va_heap> *life)
2383 {
2384   unsigned scalar_cost = 0;
2385   unsigned i;
2386   gimple *stmt;
2387   slp_tree child;
2388
2389   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2390     {
2391       unsigned stmt_cost;
2392       ssa_op_iter op_iter;
2393       def_operand_p def_p;
2394       stmt_vec_info stmt_info;
2395
2396       if ((*life)[i])
2397         continue;
2398
2399       /* If there is a non-vectorized use of the defs then the scalar
2400          stmt is kept live in which case we do not account it or any
2401          required defs in the SLP children in the scalar cost.  This
2402          way we make the vectorization more costly when compared to
2403          the scalar cost.  */
2404       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2405         {
2406           imm_use_iterator use_iter;
2407           gimple *use_stmt;
2408           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2409             if (!is_gimple_debug (use_stmt)
2410                 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2411                                              use_stmt)
2412                     || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2413               {
2414                 (*life)[i] = true;
2415                 BREAK_FROM_IMM_USE_STMT (use_iter);
2416               }
2417         }
2418       if ((*life)[i])
2419         continue;
2420
2421       /* Count scalar stmts only once.  */
2422       if (gimple_visited_p (stmt))
2423         continue;
2424       gimple_set_visited (stmt, true);
2425
2426       stmt_info = vinfo_for_stmt (stmt);
2427       if (STMT_VINFO_DATA_REF (stmt_info))
2428         {
2429           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2430             stmt_cost = vect_get_stmt_cost (scalar_load);
2431           else
2432             stmt_cost = vect_get_stmt_cost (scalar_store);
2433         }
2434       else
2435         stmt_cost = vect_get_stmt_cost (scalar_stmt);
2436
2437       scalar_cost += stmt_cost;
2438     }
2439
2440   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2441     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2442       scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2443
2444   return scalar_cost;
2445 }
2446
2447 /* Check if vectorization of the basic block is profitable.  */
2448
2449 static bool
2450 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2451 {
2452   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2453   slp_instance instance;
2454   int i;
2455   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2456   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2457
2458   /* Calculate scalar cost.  */
2459   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2460     {
2461       auto_vec<bool, 20> life;
2462       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2463       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2464                                               SLP_INSTANCE_TREE (instance),
2465                                               &life);
2466     }
2467
2468   /* Unset visited flag.  */
2469   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2470        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2471     gimple_set_visited  (gsi_stmt (gsi), false);
2472
2473   /* Complete the target-specific cost calculation.  */
2474   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2475                &vec_inside_cost, &vec_epilogue_cost);
2476
2477   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2478
2479   if (dump_enabled_p ())
2480     {
2481       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2482       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2483                    vec_inside_cost);
2484       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2485       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2486       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
2487     }
2488
2489   /* Vectorization is profitable if its cost is more than the cost of scalar
2490      version.  Note that we err on the vector side for equal cost because
2491      the cost estimate is otherwise quite pessimistic (constant uses are
2492      free on the scalar side but cost a load on the vector side for
2493      example).  */
2494   if (vec_outside_cost + vec_inside_cost > scalar_cost)
2495     return false;
2496
2497   return true;
2498 }
2499
2500 /* Check if the basic block can be vectorized.  Returns a bb_vec_info
2501    if so and sets fatal to true if failure is independent of
2502    current_vector_size.  */
2503
2504 static bb_vec_info
2505 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2506                        gimple_stmt_iterator region_end,
2507                        vec<data_reference_p> datarefs, int n_stmts,
2508                        bool &fatal)
2509 {
2510   bb_vec_info bb_vinfo;
2511   slp_instance instance;
2512   int i;
2513   int min_vf = 2;
2514
2515   /* The first group of checks is independent of the vector size.  */
2516   fatal = true;
2517
2518   if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2519     {
2520       if (dump_enabled_p ())
2521         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2522                          "not vectorized: too many instructions in "
2523                          "basic block.\n");
2524       free_data_refs (datarefs);
2525       return NULL;
2526     }
2527
2528   bb_vinfo = new_bb_vec_info (region_begin, region_end);
2529   if (!bb_vinfo)
2530     return NULL;
2531
2532   BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2533
2534   /* Analyze the data references.  */
2535
2536   if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2537     {
2538       if (dump_enabled_p ())
2539         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2540                          "not vectorized: unhandled data-ref in basic "
2541                          "block.\n");
2542
2543       destroy_bb_vec_info (bb_vinfo);
2544       return NULL;
2545     }
2546
2547   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2548     {
2549       if (dump_enabled_p ())
2550         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551                          "not vectorized: not enough data-refs in "
2552                          "basic block.\n");
2553
2554       destroy_bb_vec_info (bb_vinfo);
2555       return NULL;
2556     }
2557
2558   if (!vect_analyze_data_ref_accesses (bb_vinfo))
2559     {
2560      if (dump_enabled_p ())
2561        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2562                         "not vectorized: unhandled data access in "
2563                         "basic block.\n");
2564
2565       destroy_bb_vec_info (bb_vinfo);
2566       return NULL;
2567     }
2568
2569   /* If there are no grouped stores in the region there is no need
2570      to continue with pattern recog as vect_analyze_slp will fail
2571      anyway.  */
2572   if (bb_vinfo->grouped_stores.is_empty ())
2573     {
2574       if (dump_enabled_p ())
2575         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2576                          "not vectorized: no grouped stores in "
2577                          "basic block.\n");
2578
2579       destroy_bb_vec_info (bb_vinfo);
2580       return NULL;
2581     }
2582
2583   /* While the rest of the analysis below depends on it in some way.  */
2584   fatal = false;
2585
2586   vect_pattern_recog (bb_vinfo);
2587
2588   /* Check the SLP opportunities in the basic block, analyze and build SLP
2589      trees.  */
2590   if (!vect_analyze_slp (bb_vinfo, n_stmts))
2591     {
2592       if (dump_enabled_p ())
2593         {
2594           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2595                            "Failed to SLP the basic block.\n");
2596           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2597                            "not vectorized: failed to find SLP opportunities "
2598                            "in basic block.\n");
2599         }
2600
2601       destroy_bb_vec_info (bb_vinfo);
2602       return NULL;
2603     }
2604
2605   /* Analyze and verify the alignment of data references and the
2606      dependence in the SLP instances.  */
2607   for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2608     {
2609       if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2610           || ! vect_slp_analyze_instance_dependence (instance))
2611         {
2612           dump_printf_loc (MSG_NOTE, vect_location,
2613                            "removing SLP instance operations starting from: ");
2614           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2615                             SLP_TREE_SCALAR_STMTS
2616                               (SLP_INSTANCE_TREE (instance))[0], 0);
2617           vect_free_slp_instance (instance);
2618           BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2619           continue;
2620         }
2621
2622       /* Mark all the statements that we want to vectorize as pure SLP and
2623          relevant.  */
2624       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2625       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2626
2627       i++;
2628     }
2629   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2630     {
2631       destroy_bb_vec_info (bb_vinfo);
2632       return NULL;
2633     }
2634
2635   if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2636                                     BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2637     {
2638       if (dump_enabled_p ())
2639         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2640                          "not vectorized: bad operation in basic block.\n");
2641
2642       destroy_bb_vec_info (bb_vinfo);
2643       return NULL;
2644     }
2645
2646   /* Cost model: check if the vectorization is worthwhile.  */
2647   if (!unlimited_cost_model (NULL)
2648       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2649     {
2650       if (dump_enabled_p ())
2651         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2652                          "not vectorized: vectorization is not "
2653                          "profitable.\n");
2654
2655       destroy_bb_vec_info (bb_vinfo);
2656       return NULL;
2657     }
2658
2659   if (dump_enabled_p ())
2660     dump_printf_loc (MSG_NOTE, vect_location,
2661                      "Basic block will be vectorized using SLP\n");
2662
2663   return bb_vinfo;
2664 }
2665
2666
2667 /* Main entry for the BB vectorizer.  Analyze and transform BB, returns
2668    true if anything in the basic-block was vectorized.  */
2669
2670 bool
2671 vect_slp_bb (basic_block bb)
2672 {
2673   bb_vec_info bb_vinfo;
2674   gimple_stmt_iterator gsi;
2675   unsigned int vector_sizes;
2676   bool any_vectorized = false;
2677
2678   if (dump_enabled_p ())
2679     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2680
2681   /* Autodetect first vector size we try.  */
2682   current_vector_size = 0;
2683   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2684
2685   gsi = gsi_start_bb (bb);
2686
2687   while (1)
2688     {
2689       if (gsi_end_p (gsi))
2690         break;
2691
2692       gimple_stmt_iterator region_begin = gsi;
2693       vec<data_reference_p> datarefs = vNULL;
2694       int insns = 0;
2695
2696       for (; !gsi_end_p (gsi); gsi_next (&gsi))
2697         {
2698           gimple *stmt = gsi_stmt (gsi);
2699           if (is_gimple_debug (stmt))
2700             continue;
2701           insns++;
2702
2703           if (gimple_location (stmt) != UNKNOWN_LOCATION)
2704             vect_location = gimple_location (stmt);
2705
2706           if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2707             break;
2708         }
2709
2710       /* Skip leading unhandled stmts.  */
2711       if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2712         {
2713           gsi_next (&gsi);
2714           continue;
2715         }
2716
2717       gimple_stmt_iterator region_end = gsi;
2718
2719       bool vectorized = false;
2720       bool fatal = false;
2721       bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
2722                                         datarefs, insns, fatal);
2723       if (bb_vinfo
2724           && dbg_cnt (vect_slp))
2725         {
2726           if (dump_enabled_p ())
2727             dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
2728
2729           vect_schedule_slp (bb_vinfo);
2730
2731           if (dump_enabled_p ())
2732             dump_printf_loc (MSG_NOTE, vect_location,
2733                              "basic block part vectorized\n");
2734
2735           destroy_bb_vec_info (bb_vinfo);
2736
2737           vectorized = true;
2738         }
2739       else
2740         destroy_bb_vec_info (bb_vinfo);
2741
2742       any_vectorized |= vectorized;
2743
2744       vector_sizes &= ~current_vector_size;
2745       if (vectorized
2746           || vector_sizes == 0
2747           || current_vector_size == 0
2748           /* If vect_slp_analyze_bb_1 signaled that analysis for all
2749              vector sizes will fail do not bother iterating.  */
2750           || fatal)
2751         {
2752           if (gsi_end_p (region_end))
2753             break;
2754
2755           /* Skip the unhandled stmt.  */
2756           gsi_next (&gsi);
2757
2758           /* And reset vector sizes.  */
2759           current_vector_size = 0;
2760           vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2761         }
2762       else
2763         {
2764           /* Try the next biggest vector size.  */
2765           current_vector_size = 1 << floor_log2 (vector_sizes);
2766           if (dump_enabled_p ())
2767             dump_printf_loc (MSG_NOTE, vect_location,
2768                              "***** Re-trying analysis with "
2769                              "vector size %d\n", current_vector_size);
2770
2771           /* Start over.  */
2772           gsi = region_begin;
2773         }
2774     }
2775
2776   return any_vectorized;
2777 }
2778
2779
2780 /* Return 1 if vector type of boolean constant which is OPNUM
2781    operand in statement STMT is a boolean vector.  */
2782
2783 static bool
2784 vect_mask_constant_operand_p (gimple *stmt, int opnum)
2785 {
2786   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2787   enum tree_code code = gimple_expr_code (stmt);
2788   tree op, vectype;
2789   gimple *def_stmt;
2790   enum vect_def_type dt;
2791
2792   /* For comparison and COND_EXPR type is chosen depending
2793      on the other comparison operand.  */
2794   if (TREE_CODE_CLASS (code) == tcc_comparison)
2795     {
2796       if (opnum)
2797         op = gimple_assign_rhs1 (stmt);
2798       else
2799         op = gimple_assign_rhs2 (stmt);
2800
2801       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2802                                &dt, &vectype))
2803         gcc_unreachable ();
2804
2805       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2806     }
2807
2808   if (code == COND_EXPR)
2809     {
2810       tree cond = gimple_assign_rhs1 (stmt);
2811
2812       if (TREE_CODE (cond) == SSA_NAME)
2813         return false;
2814
2815       if (opnum)
2816         op = TREE_OPERAND (cond, 1);
2817       else
2818         op = TREE_OPERAND (cond, 0);
2819
2820       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2821                                &dt, &vectype))
2822         gcc_unreachable ();
2823
2824       return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2825     }
2826
2827   return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
2828 }
2829
2830
2831 /* For constant and loop invariant defs of SLP_NODE this function returns
2832    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2833    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2834    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2835    REDUC_INDEX is the index of the reduction operand in the statements, unless
2836    it is -1.  */
2837
2838 static void
2839 vect_get_constant_vectors (tree op, slp_tree slp_node,
2840                            vec<tree> *vec_oprnds,
2841                            unsigned int op_num, unsigned int number_of_vectors,
2842                            int reduc_index)
2843 {
2844   vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2845   gimple *stmt = stmts[0];
2846   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2847   unsigned nunits;
2848   tree vec_cst;
2849   tree *elts;
2850   unsigned j, number_of_places_left_in_vector;
2851   tree vector_type;
2852   tree vop;
2853   int group_size = stmts.length ();
2854   unsigned int vec_num, i;
2855   unsigned number_of_copies = 1;
2856   vec<tree> voprnds;
2857   voprnds.create (number_of_vectors);
2858   bool constant_p, is_store;
2859   tree neutral_op = NULL;
2860   enum tree_code code = gimple_expr_code (stmt);
2861   gimple *def_stmt;
2862   struct loop *loop;
2863   gimple_seq ctor_seq = NULL;
2864
2865   /* Check if vector type is a boolean vector.  */
2866   if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
2867       && vect_mask_constant_operand_p (stmt, op_num))
2868     vector_type
2869       = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
2870   else
2871     vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2872   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2873
2874   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2875       && reduc_index != -1)
2876     {
2877       op_num = reduc_index;
2878       op = gimple_op (stmt, op_num + 1);
2879       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2880          we need either neutral operands or the original operands.  See
2881          get_initial_def_for_reduction() for details.  */
2882       switch (code)
2883         {
2884           case WIDEN_SUM_EXPR:
2885           case DOT_PROD_EXPR:
2886           case SAD_EXPR:
2887           case PLUS_EXPR:
2888           case MINUS_EXPR:
2889           case BIT_IOR_EXPR:
2890           case BIT_XOR_EXPR:
2891              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2892                neutral_op = build_real (TREE_TYPE (op), dconst0);
2893              else
2894                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2895
2896              break;
2897
2898           case MULT_EXPR:
2899              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2900                neutral_op = build_real (TREE_TYPE (op), dconst1);
2901              else
2902                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2903
2904              break;
2905
2906           case BIT_AND_EXPR:
2907             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2908             break;
2909
2910           /* For MIN/MAX we don't have an easy neutral operand but
2911              the initial values can be used fine here.  Only for
2912              a reduction chain we have to force a neutral element.  */
2913           case MAX_EXPR:
2914           case MIN_EXPR:
2915             if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2916               neutral_op = NULL;
2917             else
2918               {
2919                 def_stmt = SSA_NAME_DEF_STMT (op);
2920                 loop = (gimple_bb (stmt))->loop_father;
2921                 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2922                                                     loop_preheader_edge (loop));
2923               }
2924             break;
2925
2926           default:
2927             gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
2928             neutral_op = NULL;
2929         }
2930     }
2931
2932   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2933     {
2934       is_store = true;
2935       op = gimple_assign_rhs1 (stmt);
2936     }
2937   else
2938     is_store = false;
2939
2940   gcc_assert (op);
2941
2942   if (CONSTANT_CLASS_P (op))
2943     constant_p = true;
2944   else
2945     constant_p = false;
2946
2947   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2948      created vectors. It is greater than 1 if unrolling is performed.
2949
2950      For example, we have two scalar operands, s1 and s2 (e.g., group of
2951      strided accesses of size two), while NUNITS is four (i.e., four scalars
2952      of this type can be packed in a vector).  The output vector will contain
2953      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2954      will be 2).
2955
2956      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2957      containing the operands.
2958
2959      For example, NUNITS is four as before, and the group size is 8
2960      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2961      {s5, s6, s7, s8}.  */
2962
2963   number_of_copies = nunits * number_of_vectors / group_size;
2964
2965   number_of_places_left_in_vector = nunits;
2966   elts = XALLOCAVEC (tree, nunits);
2967   bool place_after_defs = false;
2968   for (j = 0; j < number_of_copies; j++)
2969     {
2970       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2971         {
2972           if (is_store)
2973             op = gimple_assign_rhs1 (stmt);
2974           else
2975             {
2976               switch (code)
2977                 {
2978                   case COND_EXPR:
2979                     {
2980                       tree cond = gimple_assign_rhs1 (stmt);
2981                       if (TREE_CODE (cond) == SSA_NAME)
2982                         op = gimple_op (stmt, op_num + 1);
2983                       else if (op_num == 0 || op_num == 1)
2984                         op = TREE_OPERAND (cond, op_num);
2985                       else
2986                         {
2987                           if (op_num == 2)
2988                             op = gimple_assign_rhs2 (stmt);
2989                           else
2990                             op = gimple_assign_rhs3 (stmt);
2991                         }
2992                     }
2993                     break;
2994
2995                   case CALL_EXPR:
2996                     op = gimple_call_arg (stmt, op_num);
2997                     break;
2998
2999                   case LSHIFT_EXPR:
3000                   case RSHIFT_EXPR:
3001                   case LROTATE_EXPR:
3002                   case RROTATE_EXPR:
3003                     op = gimple_op (stmt, op_num + 1);
3004                     /* Unlike the other binary operators, shifts/rotates have
3005                        the shift count being int, instead of the same type as
3006                        the lhs, so make sure the scalar is the right type if
3007                        we are dealing with vectors of
3008                        long long/long/short/char.  */
3009                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3010                       op = fold_convert (TREE_TYPE (vector_type), op);
3011                     break;
3012
3013                   default:
3014                     op = gimple_op (stmt, op_num + 1);
3015                     break;
3016                 }
3017             }
3018
3019           if (reduc_index != -1)
3020             {
3021               loop = (gimple_bb (stmt))->loop_father;
3022               def_stmt = SSA_NAME_DEF_STMT (op);
3023
3024               gcc_assert (loop);
3025
3026               /* Get the def before the loop.  In reduction chain we have only
3027                  one initial value.  */
3028               if ((j != (number_of_copies - 1)
3029                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
3030                        && i != 0))
3031                   && neutral_op)
3032                 op = neutral_op;
3033               else
3034                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
3035                                             loop_preheader_edge (loop));
3036             }
3037
3038           /* Create 'vect_ = {op0,op1,...,opn}'.  */
3039           number_of_places_left_in_vector--;
3040           tree orig_op = op;
3041           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3042             {
3043               if (CONSTANT_CLASS_P (op))
3044                 {
3045                   if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3046                     {
3047                       /* Can't use VIEW_CONVERT_EXPR for booleans because
3048                          of possibly different sizes of scalar value and
3049                          vector element.  */
3050                       if (integer_zerop (op))
3051                         op = build_int_cst (TREE_TYPE (vector_type), 0);
3052                       else if (integer_onep (op))
3053                         op = build_all_ones_cst (TREE_TYPE (vector_type));
3054                       else
3055                         gcc_unreachable ();
3056                     }
3057                   else
3058                     op = fold_unary (VIEW_CONVERT_EXPR,
3059                                      TREE_TYPE (vector_type), op);
3060                   gcc_assert (op && CONSTANT_CLASS_P (op));
3061                 }
3062               else
3063                 {
3064                   tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3065                   gimple *init_stmt;
3066                   if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3067                     {
3068                       tree true_val
3069                         = build_all_ones_cst (TREE_TYPE (vector_type));
3070                       tree false_val
3071                         = build_zero_cst (TREE_TYPE (vector_type));
3072                       gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3073                       init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3074                                                        op, true_val,
3075                                                        false_val);
3076                     }
3077                   else
3078                     {
3079                       op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3080                                    op);
3081                       init_stmt
3082                         = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3083                                                op);
3084                     }
3085                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
3086                   op = new_temp;
3087                 }
3088             }
3089           elts[number_of_places_left_in_vector] = op;
3090           if (!CONSTANT_CLASS_P (op))
3091             constant_p = false;
3092           if (TREE_CODE (orig_op) == SSA_NAME
3093               && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3094               && STMT_VINFO_BB_VINFO (stmt_vinfo)
3095               && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3096                   == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3097             place_after_defs = true;
3098
3099           if (number_of_places_left_in_vector == 0)
3100             {
3101               number_of_places_left_in_vector = nunits;
3102
3103               if (constant_p)
3104                 vec_cst = build_vector (vector_type, elts);
3105               else
3106                 {
3107                   vec<constructor_elt, va_gc> *v;
3108                   unsigned k;
3109                   vec_alloc (v, nunits);
3110                   for (k = 0; k < nunits; ++k)
3111                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3112                   vec_cst = build_constructor (vector_type, v);
3113                 }
3114               tree init;
3115               gimple_stmt_iterator gsi;
3116               if (place_after_defs)
3117                 {
3118                   gsi = gsi_for_stmt
3119                           (vect_find_last_scalar_stmt_in_slp (slp_node));
3120                   init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3121                 }
3122               else
3123                 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3124               if (ctor_seq != NULL)
3125                 {
3126                   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3127                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3128                                                         GSI_SAME_STMT);
3129                   ctor_seq = NULL;
3130                 }
3131               voprnds.quick_push (init);
3132               place_after_defs = false;
3133             }
3134         }
3135     }
3136
3137   /* Since the vectors are created in the reverse order, we should invert
3138      them.  */
3139   vec_num = voprnds.length ();
3140   for (j = vec_num; j != 0; j--)
3141     {
3142       vop = voprnds[j - 1];
3143       vec_oprnds->quick_push (vop);
3144     }
3145
3146   voprnds.release ();
3147
3148   /* In case that VF is greater than the unrolling factor needed for the SLP
3149      group of stmts, NUMBER_OF_VECTORS to be created is greater than
3150      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3151      to replicate the vectors.  */
3152   while (number_of_vectors > vec_oprnds->length ())
3153     {
3154       tree neutral_vec = NULL;
3155
3156       if (neutral_op)
3157         {
3158           if (!neutral_vec)
3159             neutral_vec = build_vector_from_val (vector_type, neutral_op);
3160
3161           vec_oprnds->quick_push (neutral_vec);
3162         }
3163       else
3164         {
3165           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3166             vec_oprnds->quick_push (vop);
3167         }
3168     }
3169 }
3170
3171
3172 /* Get vectorized definitions from SLP_NODE that contains corresponding
3173    vectorized def-stmts.  */
3174
3175 static void
3176 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3177 {
3178   tree vec_oprnd;
3179   gimple *vec_def_stmt;
3180   unsigned int i;
3181
3182   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3183
3184   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3185     {
3186       gcc_assert (vec_def_stmt);
3187       vec_oprnd = gimple_get_lhs (vec_def_stmt);
3188       vec_oprnds->quick_push (vec_oprnd);
3189     }
3190 }
3191
3192
3193 /* Get vectorized definitions for SLP_NODE.
3194    If the scalar definitions are loop invariants or constants, collect them and
3195    call vect_get_constant_vectors() to create vector stmts.
3196    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3197    must be stored in the corresponding child of SLP_NODE, and we call
3198    vect_get_slp_vect_defs () to retrieve them.  */
3199
3200 void
3201 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3202                    vec<vec<tree> > *vec_oprnds, int reduc_index)
3203 {
3204   gimple *first_stmt;
3205   int number_of_vects = 0, i;
3206   unsigned int child_index = 0;
3207   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3208   slp_tree child = NULL;
3209   vec<tree> vec_defs;
3210   tree oprnd;
3211   bool vectorized_defs;
3212
3213   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3214   FOR_EACH_VEC_ELT (ops, i, oprnd)
3215     {
3216       /* For each operand we check if it has vectorized definitions in a child
3217          node or we need to create them (for invariants and constants).  We
3218          check if the LHS of the first stmt of the next child matches OPRND.
3219          If it does, we found the correct child.  Otherwise, we call
3220          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3221          to check this child node for the next operand.  */
3222       vectorized_defs = false;
3223       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3224         {
3225           child = SLP_TREE_CHILDREN (slp_node)[child_index];
3226
3227           /* We have to check both pattern and original def, if available.  */
3228           if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3229             {
3230               gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3231               gimple *related
3232                 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3233
3234               if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
3235                   || (related
3236                       && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3237                 {
3238                   /* The number of vector defs is determined by the number of
3239                      vector statements in the node from which we get those
3240                      statements.  */
3241                   number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3242                   vectorized_defs = true;
3243                   child_index++;
3244                 }
3245             }
3246           else
3247             child_index++;
3248         }
3249
3250       if (!vectorized_defs)
3251         {
3252           if (i == 0)
3253             {
3254               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3255               /* Number of vector stmts was calculated according to LHS in
3256                  vect_schedule_slp_instance (), fix it by replacing LHS with
3257                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
3258                  details.  */
3259               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3260                                              &rhs_size_unit);
3261               if (rhs_size_unit != lhs_size_unit)
3262                 {
3263                   number_of_vects *= rhs_size_unit;
3264                   number_of_vects /= lhs_size_unit;
3265                 }
3266             }
3267         }
3268
3269       /* Allocate memory for vectorized defs.  */
3270       vec_defs = vNULL;
3271       vec_defs.create (number_of_vects);
3272
3273       /* For reduction defs we call vect_get_constant_vectors (), since we are
3274          looking for initial loop invariant values.  */
3275       if (vectorized_defs && reduc_index == -1)
3276         /* The defs are already vectorized.  */
3277         vect_get_slp_vect_defs (child, &vec_defs);
3278       else
3279         /* Build vectors from scalar defs.  */
3280         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3281                                    number_of_vects, reduc_index);
3282
3283       vec_oprnds->quick_push (vec_defs);
3284
3285       /* For reductions, we only need initial values.  */
3286       if (reduc_index != -1)
3287         return;
3288     }
3289 }
3290
3291
3292 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
3293    building a vector of type MASK_TYPE from it) and two input vectors placed in
3294    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3295    shifting by STRIDE elements of DR_CHAIN for every copy.
3296    (STRIDE is the number of vectorized stmts for NODE divided by the number of
3297    copies).
3298    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3299    the created stmts must be inserted.  */
3300
3301 static inline void
3302 vect_create_mask_and_perm (gimple *stmt,
3303                            tree mask, int first_vec_indx, int second_vec_indx,
3304                            gimple_stmt_iterator *gsi, slp_tree node,
3305                            tree vectype, vec<tree> dr_chain,
3306                            int ncopies, int vect_stmts_counter)
3307 {
3308   tree perm_dest;
3309   gimple *perm_stmt = NULL;
3310   int i, stride_in, stride_out;
3311   tree first_vec, second_vec, data_ref;
3312
3313   stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3314   stride_in = dr_chain.length () / ncopies;
3315
3316   /* Initialize the vect stmts of NODE to properly insert the generated
3317      stmts later.  */
3318   for (i = SLP_TREE_VEC_STMTS (node).length ();
3319        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3320     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3321
3322   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3323   for (i = 0; i < ncopies; i++)
3324     {
3325       first_vec = dr_chain[first_vec_indx];
3326       second_vec = dr_chain[second_vec_indx];
3327
3328       /* Generate the permute statement if necessary.  */
3329       if (mask)
3330         {
3331           perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3332                                            first_vec, second_vec, mask);
3333           data_ref = make_ssa_name (perm_dest, perm_stmt);
3334           gimple_set_lhs (perm_stmt, data_ref);
3335           vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3336         }
3337       else
3338         /* If mask was NULL_TREE generate the requested identity transform.  */
3339         perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3340
3341       /* Store the vector statement in NODE.  */
3342       SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
3343         = perm_stmt;
3344
3345       first_vec_indx += stride_in;
3346       second_vec_indx += stride_in;
3347     }
3348 }
3349
3350
3351 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3352    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3353    permute statements for the SLP node NODE of the SLP instance
3354    SLP_NODE_INSTANCE.  */
3355
3356 bool
3357 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3358                               gimple_stmt_iterator *gsi, int vf,
3359                               slp_instance slp_node_instance, bool analyze_only)
3360 {
3361   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3362   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3363   tree mask_element_type = NULL_TREE, mask_type;
3364   int nunits, vec_index = 0;
3365   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3366   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3367   int unroll_factor, mask_element, ncopies;
3368   unsigned char *mask;
3369   machine_mode mode;
3370
3371   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3372     return false;
3373
3374   stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3375
3376   mode = TYPE_MODE (vectype);
3377
3378   /* The generic VEC_PERM_EXPR code always uses an integral type of the
3379      same size as the vector element being permuted.  */
3380   mask_element_type = lang_hooks.types.type_for_mode
3381                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3382   mask_type = get_vectype_for_scalar_type (mask_element_type);
3383   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3384   mask = XALLOCAVEC (unsigned char, nunits);
3385   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3386
3387   /* Number of copies is determined by the final vectorization factor
3388      relatively to SLP_NODE_INSTANCE unrolling factor.  */
3389   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3390
3391   /* Generate permutation masks for every NODE. Number of masks for each NODE
3392      is equal to GROUP_SIZE.
3393      E.g., we have a group of three nodes with three loads from the same
3394      location in each node, and the vector size is 4. I.e., we have a
3395      a0b0c0a1b1c1... sequence and we need to create the following vectors:
3396      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3397      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3398      ...
3399
3400      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3401      The last mask is illegal since we assume two operands for permute
3402      operation, and the mask element values can't be outside that range.
3403      Hence, the last mask must be converted into {2,5,5,5}.
3404      For the first two permutations we need the first and the second input
3405      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3406      we need the second and the third vectors: {b1,c1,a2,b2} and
3407      {c2,a3,b3,c3}.  */
3408
3409   int vect_stmts_counter = 0;
3410   int index = 0;
3411   int first_vec_index = -1;
3412   int second_vec_index = -1;
3413   bool noop_p = true;
3414
3415   for (int j = 0; j < unroll_factor; j++)
3416     {
3417       for (int k = 0; k < group_size; k++)
3418         {
3419           int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3420                    + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3421           vec_index = i / nunits;
3422           mask_element = i % nunits;
3423           if (vec_index == first_vec_index
3424               || first_vec_index == -1)
3425             {
3426               first_vec_index = vec_index;
3427             }
3428           else if (vec_index == second_vec_index
3429                    || second_vec_index == -1)
3430             {
3431               second_vec_index = vec_index;
3432               mask_element += nunits;
3433             }
3434           else
3435             {
3436               if (dump_enabled_p ())
3437                 {
3438                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3439                                    "permutation requires at "
3440                                    "least three vectors ");
3441                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3442                                     stmt, 0);
3443                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3444                 }
3445               return false;
3446             }
3447
3448           gcc_assert (mask_element >= 0
3449                       && mask_element < 2 * nunits);
3450           if (mask_element != index)
3451             noop_p = false;
3452           mask[index++] = mask_element;
3453
3454           if (index == nunits)
3455             {
3456               if (! noop_p
3457                   && ! can_vec_perm_p (mode, false, mask))
3458                 {
3459                   if (dump_enabled_p ())
3460                     {
3461                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3462                                        vect_location, 
3463                                        "unsupported vect permute { ");
3464                       for (i = 0; i < nunits; ++i)
3465                         dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3466                       dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3467                     }
3468                   return false;
3469                 }
3470
3471               if (!analyze_only)
3472                 {
3473                   tree mask_vec = NULL_TREE;
3474                   
3475                   if (! noop_p)
3476                     {
3477                       tree *mask_elts = XALLOCAVEC (tree, nunits);
3478                       for (int l = 0; l < nunits; ++l)
3479                         mask_elts[l] = build_int_cst (mask_element_type,
3480                                                       mask[l]);
3481                       mask_vec = build_vector (mask_type, mask_elts);
3482                     }
3483
3484                   if (second_vec_index == -1)
3485                     second_vec_index = first_vec_index;
3486                   vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
3487                                              second_vec_index,
3488                                              gsi, node, vectype, dr_chain,
3489                                              ncopies, vect_stmts_counter++);
3490                 }
3491
3492               index = 0;
3493               first_vec_index = -1;
3494               second_vec_index = -1;
3495               noop_p = true;
3496             }
3497         }
3498     }
3499
3500   return true;
3501 }
3502
3503
3504
3505 /* Vectorize SLP instance tree in postorder.  */
3506
3507 static bool
3508 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3509                             unsigned int vectorization_factor)
3510 {
3511   gimple *stmt;
3512   bool grouped_store, is_store;
3513   gimple_stmt_iterator si;
3514   stmt_vec_info stmt_info;
3515   unsigned int vec_stmts_size, nunits, group_size;
3516   tree vectype;
3517   int i, j;
3518   slp_tree child;
3519
3520   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3521     return false;
3522
3523   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3524     vect_schedule_slp_instance (child, instance, vectorization_factor);
3525
3526   /* Push SLP node def-type to stmts.  */
3527   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3528     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3529       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3530         STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3531
3532   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3533   stmt_info = vinfo_for_stmt (stmt);
3534
3535   /* VECTYPE is the type of the destination.  */
3536   vectype = STMT_VINFO_VECTYPE (stmt_info);
3537   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3538   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3539
3540   /* For each SLP instance calculate number of vector stmts to be created
3541      for the scalar stmts in each node of the SLP tree.  Number of vector
3542      elements in one vector iteration is the number of scalar elements in
3543      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3544      size.
3545      Unless this is a SLP reduction in which case the number of vector
3546      stmts is equal to the number of vector stmts of the children.  */
3547   if (GROUP_FIRST_ELEMENT (stmt_info)
3548       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3549     vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3550   else
3551     vec_stmts_size = (vectorization_factor * group_size) / nunits;
3552
3553   if (!SLP_TREE_VEC_STMTS (node).exists ())
3554     {
3555       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3556       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3557     }
3558
3559   if (dump_enabled_p ())
3560     {
3561       dump_printf_loc (MSG_NOTE,vect_location,
3562                        "------>vectorizing SLP node starting from: ");
3563       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3564       dump_printf (MSG_NOTE, "\n");
3565     }
3566
3567   /* Vectorized stmts go before the last scalar stmt which is where
3568      all uses are ready.  */
3569   si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3570
3571   /* Mark the first element of the reduction chain as reduction to properly
3572      transform the node.  In the analysis phase only the last element of the
3573      chain is marked as reduction.  */
3574   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3575       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3576     {
3577       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3578       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3579     }
3580
3581   /* Handle two-operation SLP nodes by vectorizing the group with
3582      both operations and then performing a merge.  */
3583   if (SLP_TREE_TWO_OPERATORS (node))
3584     {
3585       enum tree_code code0 = gimple_assign_rhs_code (stmt);
3586       enum tree_code ocode = ERROR_MARK;
3587       gimple *ostmt;
3588       unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3589       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3590         if (gimple_assign_rhs_code (ostmt) != code0)
3591           {
3592             mask[i] = 1;
3593             ocode = gimple_assign_rhs_code (ostmt);
3594           }
3595         else
3596           mask[i] = 0;
3597       if (ocode != ERROR_MARK)
3598         {
3599           vec<gimple *> v0;
3600           vec<gimple *> v1;
3601           unsigned j;
3602           tree tmask = NULL_TREE;
3603           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3604           v0 = SLP_TREE_VEC_STMTS (node).copy ();
3605           SLP_TREE_VEC_STMTS (node).truncate (0);
3606           gimple_assign_set_rhs_code (stmt, ocode);
3607           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3608           gimple_assign_set_rhs_code (stmt, code0);
3609           v1 = SLP_TREE_VEC_STMTS (node).copy ();
3610           SLP_TREE_VEC_STMTS (node).truncate (0);
3611           tree meltype = build_nonstandard_integer_type
3612               (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3613           tree mvectype = get_same_sized_vectype (meltype, vectype);
3614           unsigned k = 0, l;
3615           for (j = 0; j < v0.length (); ++j)
3616             {
3617               tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3618               for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3619                 {
3620                   if (k >= group_size)
3621                     k = 0;
3622                   melts[l] = build_int_cst
3623                       (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3624                 }
3625               tmask = build_vector (mvectype, melts);
3626
3627               /* ???  Not all targets support a VEC_PERM_EXPR with a
3628                  constant mask that would translate to a vec_merge RTX
3629                  (with their vec_perm_const_ok).  We can either not
3630                  vectorize in that case or let veclower do its job.
3631                  Unfortunately that isn't too great and at least for
3632                  plus/minus we'd eventually like to match targets
3633                  vector addsub instructions.  */
3634               gimple *vstmt;
3635               vstmt = gimple_build_assign (make_ssa_name (vectype),
3636                                            VEC_PERM_EXPR,
3637                                            gimple_assign_lhs (v0[j]),
3638                                            gimple_assign_lhs (v1[j]), tmask);
3639               vect_finish_stmt_generation (stmt, vstmt, &si);
3640               SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3641             }
3642           v0.release ();
3643           v1.release ();
3644           return false;
3645         }
3646     }
3647   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3648
3649   /* Restore stmt def-types.  */
3650   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3651     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3652       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3653         STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3654
3655   return is_store;
3656 }
3657
3658 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3659    For loop vectorization this is done in vectorizable_call, but for SLP
3660    it needs to be deferred until end of vect_schedule_slp, because multiple
3661    SLP instances may refer to the same scalar stmt.  */
3662
3663 static void
3664 vect_remove_slp_scalar_calls (slp_tree node)
3665 {
3666   gimple *stmt, *new_stmt;
3667   gimple_stmt_iterator gsi;
3668   int i;
3669   slp_tree child;
3670   tree lhs;
3671   stmt_vec_info stmt_info;
3672
3673   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3674     return;
3675
3676   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3677     vect_remove_slp_scalar_calls (child);
3678
3679   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3680     {
3681       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3682         continue;
3683       stmt_info = vinfo_for_stmt (stmt);
3684       if (stmt_info == NULL
3685           || is_pattern_stmt_p (stmt_info)
3686           || !PURE_SLP_STMT (stmt_info))
3687         continue;
3688       lhs = gimple_call_lhs (stmt);
3689       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3690       set_vinfo_for_stmt (new_stmt, stmt_info);
3691       set_vinfo_for_stmt (stmt, NULL);
3692       STMT_VINFO_STMT (stmt_info) = new_stmt;
3693       gsi = gsi_for_stmt (stmt);
3694       gsi_replace (&gsi, new_stmt, false);
3695       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3696     }
3697 }
3698
3699 /* Generate vector code for all SLP instances in the loop/basic block.  */
3700
3701 bool
3702 vect_schedule_slp (vec_info *vinfo)
3703 {
3704   vec<slp_instance> slp_instances;
3705   slp_instance instance;
3706   unsigned int i, vf;
3707   bool is_store = false;
3708
3709   slp_instances = vinfo->slp_instances;
3710   if (is_a <loop_vec_info> (vinfo))
3711     vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
3712   else
3713     vf = 1;
3714
3715   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3716     {
3717       /* Schedule the tree of INSTANCE.  */
3718       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3719                                              instance, vf);
3720       if (dump_enabled_p ())
3721         dump_printf_loc (MSG_NOTE, vect_location,
3722                          "vectorizing stmts using SLP.\n");
3723     }
3724
3725   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3726     {
3727       slp_tree root = SLP_INSTANCE_TREE (instance);
3728       gimple *store;
3729       unsigned int j;
3730       gimple_stmt_iterator gsi;
3731
3732       /* Remove scalar call stmts.  Do not do this for basic-block
3733          vectorization as not all uses may be vectorized.
3734          ???  Why should this be necessary?  DCE should be able to
3735          remove the stmts itself.
3736          ???  For BB vectorization we can as well remove scalar
3737          stmts starting from the SLP tree root if they have no
3738          uses.  */
3739       if (is_a <loop_vec_info> (vinfo))
3740         vect_remove_slp_scalar_calls (root);
3741
3742       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3743                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3744         {
3745           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3746             break;
3747
3748          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3749            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3750           /* Free the attached stmt_vec_info and remove the stmt.  */
3751           gsi = gsi_for_stmt (store);
3752           unlink_stmt_vdef (store);
3753           gsi_remove (&gsi, true);
3754           release_defs (store);
3755           free_stmt_vec_info (store);
3756         }
3757     }
3758
3759   return is_store;
3760 }