coretypes.h: Include hash-table.h and hash-set.h for host files.
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2015 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "input.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "tree.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
33 #include "target.h"
34 #include "predict.h"
35 #include "hard-reg-set.h"
36 #include "function.h"
37 #include "basic-block.h"
38 #include "gimple-pretty-print.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
41 #include "gimple-expr.h"
42 #include "is-a.h"
43 #include "gimple.h"
44 #include "gimple-iterator.h"
45 #include "gimple-ssa.h"
46 #include "tree-phinodes.h"
47 #include "ssa-iterators.h"
48 #include "stringpool.h"
49 #include "tree-ssanames.h"
50 #include "tree-pass.h"
51 #include "cfgloop.h"
52 #include "rtl.h"
53 #include "flags.h"
54 #include "insn-config.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "calls.h"
59 #include "emit-rtl.h"
60 #include "varasm.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "recog.h"              /* FIXME: for insn_data */
64 #include "insn-codes.h"
65 #include "optabs.h"
66 #include "tree-vectorizer.h"
67 #include "langhooks.h"
68 #include "gimple-walk.h"
69
70 /* Extract the location of the basic block in the source code.
71    Return the basic block location if succeed and NULL if not.  */
72
73 source_location
74 find_bb_location (basic_block bb)
75 {
76   gimple stmt = NULL;
77   gimple_stmt_iterator si;
78
79   if (!bb)
80     return UNKNOWN_LOCATION;
81
82   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
83     {
84       stmt = gsi_stmt (si);
85       if (gimple_location (stmt) != UNKNOWN_LOCATION)
86         return gimple_location (stmt);
87     }
88
89   return UNKNOWN_LOCATION;
90 }
91
92
93 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
94
95 static void
96 vect_free_slp_tree (slp_tree node)
97 {
98   int i;
99   slp_tree child;
100
101   if (!node)
102     return;
103
104   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
105     vect_free_slp_tree (child);
106
107   SLP_TREE_CHILDREN (node).release ();
108   SLP_TREE_SCALAR_STMTS (node).release ();
109   SLP_TREE_VEC_STMTS (node).release ();
110   SLP_TREE_LOAD_PERMUTATION (node).release ();
111
112   free (node);
113 }
114
115
116 /* Free the memory allocated for the SLP instance.  */
117
118 void
119 vect_free_slp_instance (slp_instance instance)
120 {
121   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
122   SLP_INSTANCE_LOADS (instance).release ();
123   free (instance);
124 }
125
126
127 /* Create an SLP node for SCALAR_STMTS.  */
128
129 static slp_tree
130 vect_create_new_slp_node (vec<gimple> scalar_stmts)
131 {
132   slp_tree node;
133   gimple stmt = scalar_stmts[0];
134   unsigned int nops;
135
136   if (is_gimple_call (stmt))
137     nops = gimple_call_num_args (stmt);
138   else if (is_gimple_assign (stmt))
139     {
140       nops = gimple_num_ops (stmt) - 1;
141       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
142         nops++;
143     }
144   else
145     return NULL;
146
147   node = XNEW (struct _slp_tree);
148   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
149   SLP_TREE_VEC_STMTS (node).create (0);
150   SLP_TREE_CHILDREN (node).create (nops);
151   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
152   SLP_TREE_TWO_OPERATORS (node) = false;
153
154   return node;
155 }
156
157
158 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
159    operand.  */
160 static vec<slp_oprnd_info> 
161 vect_create_oprnd_info (int nops, int group_size)
162 {
163   int i;
164   slp_oprnd_info oprnd_info;
165   vec<slp_oprnd_info> oprnds_info;
166
167   oprnds_info.create (nops);
168   for (i = 0; i < nops; i++)
169     {
170       oprnd_info = XNEW (struct _slp_oprnd_info);
171       oprnd_info->def_stmts.create (group_size);
172       oprnd_info->first_dt = vect_uninitialized_def;
173       oprnd_info->first_op_type = NULL_TREE;
174       oprnd_info->first_pattern = false;
175       oprnd_info->second_pattern = false;
176       oprnds_info.quick_push (oprnd_info);
177     }
178
179   return oprnds_info;
180 }
181
182
183 /* Free operands info.  */
184
185 static void
186 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
187 {
188   int i;
189   slp_oprnd_info oprnd_info;
190
191   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
192     {
193       oprnd_info->def_stmts.release ();
194       XDELETE (oprnd_info);
195     }
196
197   oprnds_info.release ();
198 }
199
200
201 /* Find the place of the data-ref in STMT in the interleaving chain that starts
202    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
203
204 static int
205 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
206 {
207   gimple next_stmt = first_stmt;
208   int result = 0;
209
210   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
211     return -1;
212
213   do
214     {
215       if (next_stmt == stmt)
216         return result;
217       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
218       if (next_stmt)
219         result += GROUP_GAP (vinfo_for_stmt (next_stmt));
220     }
221   while (next_stmt);
222
223   return -1;
224 }
225
226
227 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
228    they are of a valid type and that they match the defs of the first stmt of
229    the SLP group (stored in OPRNDS_INFO).  If there was a fatal error
230    return -1, if the error could be corrected by swapping operands of the
231    operation return 1, if everything is ok return 0.  */
232
233 static int 
234 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
235                              gimple stmt, unsigned stmt_num,
236                              vec<slp_oprnd_info> *oprnds_info)
237 {
238   tree oprnd;
239   unsigned int i, number_of_oprnds;
240   tree def;
241   gimple def_stmt;
242   enum vect_def_type dt = vect_uninitialized_def;
243   struct loop *loop = NULL;
244   bool pattern = false;
245   slp_oprnd_info oprnd_info;
246   int first_op_idx = 1;
247   bool commutative = false;
248   bool first_op_cond = false;
249   bool first = stmt_num == 0;
250   bool second = stmt_num == 1;
251
252   if (loop_vinfo)
253     loop = LOOP_VINFO_LOOP (loop_vinfo);
254
255   if (is_gimple_call (stmt))
256     {
257       number_of_oprnds = gimple_call_num_args (stmt);
258       first_op_idx = 3;
259     }
260   else if (is_gimple_assign (stmt))
261     {
262       enum tree_code code = gimple_assign_rhs_code (stmt);
263       number_of_oprnds = gimple_num_ops (stmt) - 1;
264       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
265         {
266           first_op_cond = true;
267           commutative = true;
268           number_of_oprnds++;
269         }
270       else
271         commutative = commutative_tree_code (code);
272     }
273   else
274     return -1;
275
276   bool swapped = false;
277   for (i = 0; i < number_of_oprnds; i++)
278     {
279 again:
280       if (first_op_cond)
281         {
282           if (i == 0 || i == 1)
283             oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
284                                   swapped ? !i : i);
285           else
286             oprnd = gimple_op (stmt, first_op_idx + i - 1);
287         }
288       else
289         oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
290
291       oprnd_info = (*oprnds_info)[i];
292
293       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
294                                &def, &dt))
295         {
296           if (dump_enabled_p ())
297             {
298               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
299                                "Build SLP failed: can't analyze def for ");
300               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
301               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
302             }
303
304           return -1;
305         }
306
307       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
308          from the pattern.  Check that all the stmts of the node are in the
309          pattern.  */
310       if (def_stmt && gimple_bb (def_stmt)
311           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
312               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
313                   && gimple_code (def_stmt) != GIMPLE_PHI))
314           && vinfo_for_stmt (def_stmt)
315           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
316           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
317           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
318         {
319           pattern = true;
320           if (!first && !oprnd_info->first_pattern
321               /* Allow different pattern state for the defs of the
322                  first stmt in reduction chains.  */
323               && (oprnd_info->first_dt != vect_reduction_def
324                   || (!second && !oprnd_info->second_pattern)))
325             {
326               if (i == 0
327                   && !swapped
328                   && commutative)
329                 {
330                   swapped = true;
331                   goto again;
332                 }
333
334               if (dump_enabled_p ())
335                 {
336                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
337                                    "Build SLP failed: some of the stmts"
338                                    " are in a pattern, and others are not ");
339                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
340                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
341                 }
342
343               return 1;
344             }
345
346           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
347           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
348
349           if (dt == vect_unknown_def_type)
350             {
351               if (dump_enabled_p ())
352                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353                                  "Unsupported pattern.\n");
354               return -1;
355             }
356
357           switch (gimple_code (def_stmt))
358             {
359               case GIMPLE_PHI:
360                 def = gimple_phi_result (def_stmt);
361                 break;
362
363               case GIMPLE_ASSIGN:
364                 def = gimple_assign_lhs (def_stmt);
365                 break;
366
367               default:
368                 if (dump_enabled_p ())
369                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
370                                    "unsupported defining stmt:\n");
371                 return -1;
372             }
373         }
374
375       if (second)
376         oprnd_info->second_pattern = pattern;
377
378       if (first)
379         {
380           oprnd_info->first_dt = dt;
381           oprnd_info->first_pattern = pattern;
382           oprnd_info->first_op_type = TREE_TYPE (oprnd);
383         }
384       else
385         {
386           /* Not first stmt of the group, check that the def-stmt/s match
387              the def-stmt/s of the first stmt.  Allow different definition
388              types for reduction chains: the first stmt must be a
389              vect_reduction_def (a phi node), and the rest
390              vect_internal_def.  */
391           if (((oprnd_info->first_dt != dt
392                 && !(oprnd_info->first_dt == vect_reduction_def
393                      && dt == vect_internal_def)
394                 && !((oprnd_info->first_dt == vect_external_def
395                       || oprnd_info->first_dt == vect_constant_def)
396                      && (dt == vect_external_def
397                          || dt == vect_constant_def)))
398                || !types_compatible_p (oprnd_info->first_op_type,
399                                        TREE_TYPE (oprnd))))
400             {
401               /* Try swapping operands if we got a mismatch.  */
402               if (i == 0
403                   && !swapped
404                   && commutative)
405                 {
406                   swapped = true;
407                   goto again;
408                 }
409
410               if (dump_enabled_p ())
411                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
412                                  "Build SLP failed: different types\n");
413
414               return 1;
415             }
416         }
417
418       /* Check the types of the definitions.  */
419       switch (dt)
420         {
421         case vect_constant_def:
422         case vect_external_def:
423         case vect_reduction_def:
424           break;
425
426         case vect_internal_def:
427           oprnd_info->def_stmts.quick_push (def_stmt);
428           break;
429
430         default:
431           /* FORNOW: Not supported.  */
432           if (dump_enabled_p ())
433             {
434               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
435                                "Build SLP failed: illegal type of def ");
436               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
437               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
438             }
439
440           return -1;
441         }
442     }
443
444   /* Swap operands.  */
445   if (swapped)
446     {
447       if (first_op_cond)
448         {
449           tree cond = gimple_assign_rhs1 (stmt);
450           swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
451                              &TREE_OPERAND (cond, 1));
452           TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
453         }
454       else
455         swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
456                            gimple_assign_rhs2_ptr (stmt));
457     }
458
459   return 0;
460 }
461
462
463 /* Verify if the scalar stmts STMTS are isomorphic, require data
464    permutation or are of unsupported types of operation.  Return
465    true if they are, otherwise return false and indicate in *MATCHES
466    which stmts are not isomorphic to the first one.  If MATCHES[0]
467    is false then this indicates the comparison could not be
468    carried out or the stmts will never be vectorized by SLP.  */
469
470 static bool
471 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
472                        vec<gimple> stmts, unsigned int group_size,
473                        unsigned nops, unsigned int *max_nunits,
474                        unsigned int vectorization_factor, bool *matches,
475                        bool *two_operators)
476 {
477   unsigned int i;
478   gimple first_stmt = stmts[0], stmt = stmts[0];
479   enum tree_code first_stmt_code = ERROR_MARK;
480   enum tree_code alt_stmt_code = ERROR_MARK;
481   enum tree_code rhs_code = ERROR_MARK;
482   enum tree_code first_cond_code = ERROR_MARK;
483   tree lhs;
484   bool need_same_oprnds = false;
485   tree vectype, scalar_type, first_op1 = NULL_TREE;
486   optab optab;
487   int icode;
488   machine_mode optab_op2_mode;
489   machine_mode vec_mode;
490   struct data_reference *first_dr;
491   HOST_WIDE_INT dummy;
492   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
493   tree cond;
494
495   /* For every stmt in NODE find its def stmt/s.  */
496   FOR_EACH_VEC_ELT (stmts, i, stmt)
497     {
498       matches[i] = false;
499
500       if (dump_enabled_p ())
501         {
502           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
503           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
504           dump_printf (MSG_NOTE, "\n");
505         }
506
507       /* Fail to vectorize statements marked as unvectorizable.  */
508       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
509         {
510           if (dump_enabled_p ())
511             {
512               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513                                "Build SLP failed: unvectorizable statement ");
514               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
515               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
516             }
517           /* Fatal mismatch.  */
518           matches[0] = false;
519           return false;
520         }
521
522       lhs = gimple_get_lhs (stmt);
523       if (lhs == NULL_TREE)
524         {
525           if (dump_enabled_p ())
526             {
527               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
528                                "Build SLP failed: not GIMPLE_ASSIGN nor "
529                                "GIMPLE_CALL ");
530               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
531               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
532             }
533           /* Fatal mismatch.  */
534           matches[0] = false;
535           return false;
536         }
537
538        if (is_gimple_assign (stmt)
539            && gimple_assign_rhs_code (stmt) == COND_EXPR
540            && (cond = gimple_assign_rhs1 (stmt))
541            && !COMPARISON_CLASS_P (cond))
542         {
543           if (dump_enabled_p ())
544             {
545               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
546                                "Build SLP failed: condition is not "
547                                "comparison ");
548               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
549               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
550             }
551           /* Fatal mismatch.  */
552           matches[0] = false;
553           return false;
554         }
555
556       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
557       vectype = get_vectype_for_scalar_type (scalar_type);
558       if (!vectype)
559         {
560           if (dump_enabled_p ())
561             {
562               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
563                                "Build SLP failed: unsupported data-type ");
564               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
565                                  scalar_type);
566               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
567             }
568           /* Fatal mismatch.  */
569           matches[0] = false;
570           return false;
571         }
572
573       /* If populating the vector type requires unrolling then fail
574          before adjusting *max_nunits for basic-block vectorization.  */
575       if (bb_vinfo
576           && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
577         {
578           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
579                            "Build SLP failed: unrolling required "
580                            "in basic block SLP\n");
581           /* Fatal mismatch.  */
582           matches[0] = false;
583           return false;
584         }
585
586       /* In case of multiple types we need to detect the smallest type.  */
587       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
588         {
589           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
590           if (bb_vinfo)
591             vectorization_factor = *max_nunits;
592         }
593
594       if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
595         {
596           rhs_code = CALL_EXPR;
597           if (gimple_call_internal_p (call_stmt)
598               || gimple_call_tail_p (call_stmt)
599               || gimple_call_noreturn_p (call_stmt)
600               || !gimple_call_nothrow_p (call_stmt)
601               || gimple_call_chain (call_stmt))
602             {
603               if (dump_enabled_p ())
604                 {
605                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
606                                    "Build SLP failed: unsupported call type ");
607                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
608                                     call_stmt, 0);
609                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
610                 }
611               /* Fatal mismatch.  */
612               matches[0] = false;
613               return false;
614             }
615         }
616       else
617         rhs_code = gimple_assign_rhs_code (stmt);
618
619       /* Check the operation.  */
620       if (i == 0)
621         {
622           first_stmt_code = rhs_code;
623
624           /* Shift arguments should be equal in all the packed stmts for a
625              vector shift with scalar shift operand.  */
626           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
627               || rhs_code == LROTATE_EXPR
628               || rhs_code == RROTATE_EXPR)
629             {
630               vec_mode = TYPE_MODE (vectype);
631
632               /* First see if we have a vector/vector shift.  */
633               optab = optab_for_tree_code (rhs_code, vectype,
634                                            optab_vector);
635
636               if (!optab
637                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
638                 {
639                   /* No vector/vector shift, try for a vector/scalar shift.  */
640                   optab = optab_for_tree_code (rhs_code, vectype,
641                                                optab_scalar);
642
643                   if (!optab)
644                     {
645                       if (dump_enabled_p ())
646                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
647                                          "Build SLP failed: no optab.\n");
648                       /* Fatal mismatch.  */
649                       matches[0] = false;
650                       return false;
651                     }
652                   icode = (int) optab_handler (optab, vec_mode);
653                   if (icode == CODE_FOR_nothing)
654                     {
655                       if (dump_enabled_p ())
656                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
657                                          "Build SLP failed: "
658                                          "op not supported by target.\n");
659                       /* Fatal mismatch.  */
660                       matches[0] = false;
661                       return false;
662                     }
663                   optab_op2_mode = insn_data[icode].operand[2].mode;
664                   if (!VECTOR_MODE_P (optab_op2_mode))
665                     {
666                       need_same_oprnds = true;
667                       first_op1 = gimple_assign_rhs2 (stmt);
668                     }
669                 }
670             }
671           else if (rhs_code == WIDEN_LSHIFT_EXPR)
672             {
673               need_same_oprnds = true;
674               first_op1 = gimple_assign_rhs2 (stmt);
675             }
676         }
677       else
678         {
679           if (first_stmt_code != rhs_code
680               && alt_stmt_code == ERROR_MARK)
681             alt_stmt_code = rhs_code;
682           if (first_stmt_code != rhs_code
683               && (first_stmt_code != IMAGPART_EXPR
684                   || rhs_code != REALPART_EXPR)
685               && (first_stmt_code != REALPART_EXPR
686                   || rhs_code != IMAGPART_EXPR)
687               /* Handle mismatches in plus/minus by computing both
688                  and merging the results.  */
689               && !((first_stmt_code == PLUS_EXPR
690                     || first_stmt_code == MINUS_EXPR)
691                    && (alt_stmt_code == PLUS_EXPR
692                        || alt_stmt_code == MINUS_EXPR)
693                    && rhs_code == alt_stmt_code)
694               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
695                    && (first_stmt_code == ARRAY_REF
696                        || first_stmt_code == BIT_FIELD_REF
697                        || first_stmt_code == INDIRECT_REF
698                        || first_stmt_code == COMPONENT_REF
699                        || first_stmt_code == MEM_REF)))
700             {
701               if (dump_enabled_p ())
702                 {
703                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
704                                    "Build SLP failed: different operation "
705                                    "in stmt ");
706                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
707                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708                                    "original stmt ");
709                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
710                                     first_stmt, 0);
711                 }
712               /* Mismatch.  */
713               continue;
714             }
715
716           if (need_same_oprnds
717               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
718             {
719               if (dump_enabled_p ())
720                 {
721                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
722                                    "Build SLP failed: different shift "
723                                    "arguments in ");
724                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
725                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
726                 }
727               /* Mismatch.  */
728               continue;
729             }
730
731           if (rhs_code == CALL_EXPR)
732             {
733               gimple first_stmt = stmts[0];
734               if (gimple_call_num_args (stmt) != nops
735                   || !operand_equal_p (gimple_call_fn (first_stmt),
736                                        gimple_call_fn (stmt), 0)
737                   || gimple_call_fntype (first_stmt)
738                      != gimple_call_fntype (stmt))
739                 {
740                   if (dump_enabled_p ())
741                     {
742                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
743                                        "Build SLP failed: different calls in ");
744                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
745                                         stmt, 0);
746                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
747                     }
748                   /* Mismatch.  */
749                   continue;
750                 }
751             }
752         }
753
754       /* Grouped store or load.  */
755       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
756         {
757           if (REFERENCE_CLASS_P (lhs))
758             {
759               /* Store.  */
760               ;
761             }
762           else
763             {
764               /* Load.  */
765               unsigned unrolling_factor
766                 = least_common_multiple
767                     (*max_nunits, group_size) / group_size;
768               /* FORNOW: Check that there is no gap between the loads
769                  and no gap between the groups when we need to load
770                  multiple groups at once.  */
771               if (unrolling_factor > 1
772                   && ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
773                        && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
774                       /* If the group is split up then GROUP_GAP
775                          isn't correct here, nor is GROUP_FIRST_ELEMENT.  */
776                       || GROUP_SIZE (vinfo_for_stmt (stmt)) > group_size))
777                 {
778                   if (dump_enabled_p ())
779                     {
780                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
781                                        "Build SLP failed: grouped "
782                                        "loads have gaps ");
783                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
784                                         stmt, 0);
785                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
786                     }
787                   /* Fatal mismatch.  */
788                   matches[0] = false;
789                   return false;
790                 }
791
792               /* Check that the size of interleaved loads group is not
793                  greater than the SLP group size.  */
794               unsigned ncopies
795                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
796               if (loop_vinfo
797                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
798                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
799                        - GROUP_GAP (vinfo_for_stmt (stmt)))
800                       > ncopies * group_size))
801                 {
802                   if (dump_enabled_p ())
803                     {
804                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
805                                        "Build SLP failed: the number "
806                                        "of interleaved loads is greater than "
807                                        "the SLP group size ");
808                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
809                                         stmt, 0);
810                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
811                     }
812                   /* Fatal mismatch.  */
813                   matches[0] = false;
814                   return false;
815                 }
816
817               old_first_load = first_load;
818               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
819               if (prev_first_load)
820                 {
821                   /* Check that there are no loads from different interleaving
822                      chains in the same node.  */
823                   if (prev_first_load != first_load)
824                     {
825                       if (dump_enabled_p ())
826                         {
827                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
828                                            vect_location, 
829                                            "Build SLP failed: different "
830                                            "interleaving chains in one node ");
831                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
832                                             stmt, 0);
833                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
834                         }
835                       /* Mismatch.  */
836                       continue;
837                     }
838                 }
839               else
840                 prev_first_load = first_load;
841
842               /* In some cases a group of loads is just the same load
843                  repeated N times.  Only analyze its cost once.  */
844               if (first_load == stmt && old_first_load != first_load)
845                 {
846                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
847                   if (vect_supportable_dr_alignment (first_dr, false)
848                       == dr_unaligned_unsupported)
849                     {
850                       if (dump_enabled_p ())
851                         {
852                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
853                                            vect_location, 
854                                            "Build SLP failed: unsupported "
855                                            "unaligned load ");
856                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
857                                             stmt, 0);
858                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
859                         }
860                       /* Fatal mismatch.  */
861                       matches[0] = false;
862                       return false;
863                     }
864                 }
865            }
866         } /* Grouped access.  */
867       else
868         {
869           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
870             {
871               /* Not grouped load.  */
872               if (dump_enabled_p ())
873                 {
874                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
875                                    "Build SLP failed: not grouped load ");
876                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
877                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
878                 }
879
880               /* FORNOW: Not grouped loads are not supported.  */
881               /* Fatal mismatch.  */
882               matches[0] = false;
883               return false;
884             }
885
886           /* Not memory operation.  */
887           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
888               && TREE_CODE_CLASS (rhs_code) != tcc_unary
889               && TREE_CODE_CLASS (rhs_code) != tcc_expression
890               && rhs_code != CALL_EXPR)
891             {
892               if (dump_enabled_p ())
893                 {
894                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
895                                    "Build SLP failed: operation");
896                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
897                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
898                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
899                 }
900               /* Fatal mismatch.  */
901               matches[0] = false;
902               return false;
903             }
904
905           if (rhs_code == COND_EXPR)
906             {
907               tree cond_expr = gimple_assign_rhs1 (stmt);
908
909               if (i == 0)
910                 first_cond_code = TREE_CODE (cond_expr);
911               else if (first_cond_code != TREE_CODE (cond_expr))
912                 {
913                   if (dump_enabled_p ())
914                     {
915                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
916                                        "Build SLP failed: different"
917                                        " operation");
918                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
919                                         stmt, 0);
920                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
921                     }
922                   /* Mismatch.  */
923                   continue;
924                 }
925             }
926         }
927
928       matches[i] = true;
929     }
930
931   for (i = 0; i < group_size; ++i)
932     if (!matches[i])
933       return false;
934
935   /* If we allowed a two-operation SLP node verify the target can cope
936      with the permute we are going to use.  */
937   if (alt_stmt_code != ERROR_MARK
938       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
939     {
940       unsigned char *sel
941         = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
942       for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
943         {
944           sel[i] = i;
945           if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
946             sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
947         }
948       if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
949         {
950           for (i = 0; i < group_size; ++i)
951             if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
952               {
953                 matches[i] = false;
954                 if (dump_enabled_p ())
955                   {
956                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
957                                      "Build SLP failed: different operation "
958                                      "in stmt ");
959                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
960                                       stmts[i], 0);
961                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
962                                      "original stmt ");
963                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
964                                       first_stmt, 0);
965                   }
966               }
967           return false;
968         }
969       *two_operators = true;
970     }
971
972   return true;
973 }
974
975 /* Recursively build an SLP tree starting from NODE.
976    Fail (and return a value not equal to zero) if def-stmts are not
977    isomorphic, require data permutation or are of unsupported types of
978    operation.  Otherwise, return 0.
979    The value returned is the depth in the SLP tree where a mismatch
980    was found.  */
981
982 static bool
983 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
984                      slp_tree *node, unsigned int group_size,
985                      unsigned int *max_nunits,
986                      vec<slp_tree> *loads,
987                      unsigned int vectorization_factor,
988                      bool *matches, unsigned *npermutes, unsigned *tree_size,
989                      unsigned max_tree_size)
990 {
991   unsigned nops, i, this_tree_size = 0;
992   gimple stmt;
993
994   matches[0] = false;
995
996   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
997   if (is_gimple_call (stmt))
998     nops = gimple_call_num_args (stmt);
999   else if (is_gimple_assign (stmt))
1000     {
1001       nops = gimple_num_ops (stmt) - 1;
1002       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1003         nops++;
1004     }
1005   else
1006     return false;
1007
1008   bool two_operators = false;
1009   if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
1010                               SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
1011                               max_nunits, vectorization_factor, matches,
1012                               &two_operators))
1013     return false;
1014   SLP_TREE_TWO_OPERATORS (*node) = two_operators;
1015
1016   /* If the SLP node is a load, terminate the recursion.  */
1017   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1018       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1019     {
1020       loads->safe_push (*node);
1021       return true;
1022     }
1023
1024   /* Get at the operands, verifying they are compatible.  */
1025   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1026   slp_oprnd_info oprnd_info;
1027   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
1028     {
1029       switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
1030                                            stmt, i, &oprnds_info))
1031         {
1032         case 0:
1033           break;
1034         case -1:
1035           matches[0] = false;
1036           vect_free_oprnd_info (oprnds_info);
1037           return false;
1038         case 1:
1039           matches[i] = false;
1040           break;
1041         }
1042     }
1043   for (i = 0; i < group_size; ++i)
1044     if (!matches[i])
1045       {
1046         vect_free_oprnd_info (oprnds_info);
1047         return false;
1048       }
1049
1050   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
1051
1052   /* Create SLP_TREE nodes for the definition node/s.  */
1053   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1054     {
1055       slp_tree child;
1056       unsigned old_nloads = loads->length ();
1057       unsigned old_max_nunits = *max_nunits;
1058
1059       if (oprnd_info->first_dt != vect_internal_def)
1060         continue;
1061
1062       if (++this_tree_size > max_tree_size)
1063         {
1064           vect_free_oprnd_info (oprnds_info);
1065           return false;
1066         }
1067
1068       child = vect_create_new_slp_node (oprnd_info->def_stmts);
1069       if (!child)
1070         {
1071           vect_free_oprnd_info (oprnds_info);
1072           return false;
1073         }
1074
1075       if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1076                                group_size, max_nunits, loads,
1077                                vectorization_factor, matches,
1078                                npermutes, &this_tree_size, max_tree_size))
1079         {
1080           /* If we have all children of child built up from scalars then just
1081              throw that away and build it up this node from scalars.  */
1082           if (!SLP_TREE_CHILDREN (child).is_empty ())
1083             {
1084               unsigned int j;
1085               slp_tree grandchild;
1086
1087               FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1088                 if (grandchild != NULL)
1089                   break;
1090               if (!grandchild)
1091                 {
1092                   /* Roll back.  */
1093                   *max_nunits = old_max_nunits;
1094                   loads->truncate (old_nloads);
1095                   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1096                       vect_free_slp_tree (grandchild);
1097                   SLP_TREE_CHILDREN (child).truncate (0);
1098
1099                   dump_printf_loc (MSG_NOTE, vect_location,
1100                                    "Building parent vector operands from "
1101                                    "scalars instead\n");
1102                   oprnd_info->def_stmts = vNULL;
1103                   vect_free_slp_tree (child);
1104                   SLP_TREE_CHILDREN (*node).quick_push (NULL);
1105                   continue;
1106                 }
1107             }
1108
1109           oprnd_info->def_stmts = vNULL;
1110           SLP_TREE_CHILDREN (*node).quick_push (child);
1111           continue;
1112         }
1113
1114       /* If the SLP build failed fatally and we analyze a basic-block
1115          simply treat nodes we fail to build as externally defined
1116          (and thus build vectors from the scalar defs).
1117          The cost model will reject outright expensive cases.
1118          ???  This doesn't treat cases where permutation ultimatively
1119          fails (or we don't try permutation below).  Ideally we'd
1120          even compute a permutation that will end up with the maximum
1121          SLP tree size...  */
1122       if (bb_vinfo
1123           && !matches[0]
1124           /* ???  Rejecting patterns this way doesn't work.  We'd have to
1125              do extra work to cancel the pattern so the uses see the
1126              scalar version.  */
1127           && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1128         {
1129           unsigned int j;
1130           slp_tree grandchild;
1131
1132           /* Roll back.  */
1133           *max_nunits = old_max_nunits;
1134           loads->truncate (old_nloads);
1135           FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1136             vect_free_slp_tree (grandchild);
1137           SLP_TREE_CHILDREN (child).truncate (0);
1138
1139           dump_printf_loc (MSG_NOTE, vect_location,
1140                            "Building vector operands from scalars\n");
1141           oprnd_info->def_stmts = vNULL;
1142           vect_free_slp_tree (child);
1143           SLP_TREE_CHILDREN (*node).quick_push (NULL);
1144           continue;
1145         }
1146
1147       /* If the SLP build for operand zero failed and operand zero
1148          and one can be commutated try that for the scalar stmts
1149          that failed the match.  */
1150       if (i == 0
1151           /* A first scalar stmt mismatch signals a fatal mismatch.  */
1152           && matches[0]
1153           /* ???  For COND_EXPRs we can swap the comparison operands
1154              as well as the arms under some constraints.  */
1155           && nops == 2
1156           && oprnds_info[1]->first_dt == vect_internal_def
1157           && is_gimple_assign (stmt)
1158           && commutative_tree_code (gimple_assign_rhs_code (stmt))
1159           && !SLP_TREE_TWO_OPERATORS (*node)
1160           /* Do so only if the number of not successful permutes was nor more
1161              than a cut-ff as re-trying the recursive match on
1162              possibly each level of the tree would expose exponential
1163              behavior.  */
1164           && *npermutes < 4)
1165         {
1166           unsigned int j;
1167           slp_tree grandchild;
1168
1169           /* Roll back.  */
1170           *max_nunits = old_max_nunits;
1171           loads->truncate (old_nloads);
1172           FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1173             vect_free_slp_tree (grandchild);
1174           SLP_TREE_CHILDREN (child).truncate (0);
1175
1176           /* Swap mismatched definition stmts.  */
1177           dump_printf_loc (MSG_NOTE, vect_location,
1178                            "Re-trying with swapped operands of stmts ");
1179           for (j = 0; j < group_size; ++j)
1180             if (!matches[j])
1181               {
1182                 gimple tem = oprnds_info[0]->def_stmts[j];
1183                 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1184                 oprnds_info[1]->def_stmts[j] = tem;
1185                 dump_printf (MSG_NOTE, "%d ", j);
1186               }
1187           dump_printf (MSG_NOTE, "\n");
1188           /* And try again with scratch 'matches' ... */
1189           bool *tem = XALLOCAVEC (bool, group_size);
1190           if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1191                                    group_size, max_nunits, loads,
1192                                    vectorization_factor,
1193                                    tem, npermutes, &this_tree_size,
1194                                    max_tree_size))
1195             {
1196               /* ... so if successful we can apply the operand swapping
1197                  to the GIMPLE IL.  This is necessary because for example
1198                  vect_get_slp_defs uses operand indexes and thus expects
1199                  canonical operand order.  */
1200               for (j = 0; j < group_size; ++j)
1201                 if (!matches[j])
1202                   {
1203                     gimple stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
1204                     swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1205                                        gimple_assign_rhs2_ptr (stmt));
1206                   }
1207               oprnd_info->def_stmts = vNULL;
1208               SLP_TREE_CHILDREN (*node).quick_push (child);
1209               continue;
1210             }
1211
1212           ++*npermutes;
1213         }
1214
1215       oprnd_info->def_stmts = vNULL;
1216       vect_free_slp_tree (child);
1217       vect_free_oprnd_info (oprnds_info);
1218       return false;
1219     }
1220
1221   if (tree_size)
1222     *tree_size += this_tree_size;
1223
1224   vect_free_oprnd_info (oprnds_info);
1225   return true;
1226 }
1227
1228 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1229
1230 static void
1231 vect_print_slp_tree (int dump_kind, slp_tree node)
1232 {
1233   int i;
1234   gimple stmt;
1235   slp_tree child;
1236
1237   if (!node)
1238     return;
1239
1240   dump_printf (dump_kind, "node ");
1241   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1242     {
1243       dump_printf (dump_kind, "\n\tstmt %d ", i);
1244       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1245     }
1246   dump_printf (dump_kind, "\n");
1247
1248   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1249     vect_print_slp_tree (dump_kind, child);
1250 }
1251
1252
1253 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1254    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1255    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1256    stmts in NODE are to be marked.  */
1257
1258 static void
1259 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1260 {
1261   int i;
1262   gimple stmt;
1263   slp_tree child;
1264
1265   if (!node)
1266     return;
1267
1268   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1269     if (j < 0 || i == j)
1270       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1271
1272   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1273     vect_mark_slp_stmts (child, mark, j);
1274 }
1275
1276
1277 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1278
1279 static void
1280 vect_mark_slp_stmts_relevant (slp_tree node)
1281 {
1282   int i;
1283   gimple stmt;
1284   stmt_vec_info stmt_info;
1285   slp_tree child;
1286
1287   if (!node)
1288     return;
1289
1290   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1291     {
1292       stmt_info = vinfo_for_stmt (stmt);
1293       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1294                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1295       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1296     }
1297
1298   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1299     vect_mark_slp_stmts_relevant (child);
1300 }
1301
1302
1303 /* Rearrange the statements of NODE according to PERMUTATION.  */
1304
1305 static void
1306 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1307                           vec<unsigned> permutation)
1308 {
1309   gimple stmt;
1310   vec<gimple> tmp_stmts;
1311   unsigned int i;
1312   slp_tree child;
1313
1314   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1315     vect_slp_rearrange_stmts (child, group_size, permutation);
1316
1317   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1318   tmp_stmts.create (group_size);
1319   tmp_stmts.quick_grow_cleared (group_size);
1320
1321   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1322     tmp_stmts[permutation[i]] = stmt;
1323
1324   SLP_TREE_SCALAR_STMTS (node).release ();
1325   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1326 }
1327
1328
1329 /* Check if the required load permutations in the SLP instance
1330    SLP_INSTN are supported.  */
1331
1332 static bool
1333 vect_supported_load_permutation_p (slp_instance slp_instn)
1334 {
1335   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1336   unsigned int i, j, k, next;
1337   sbitmap load_index;
1338   slp_tree node;
1339   gimple stmt, load, next_load, first_load;
1340   struct data_reference *dr;
1341
1342   if (dump_enabled_p ())
1343     {
1344       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1345       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1346         if (node->load_permutation.exists ())
1347           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1348             dump_printf (MSG_NOTE, "%d ", next);
1349         else
1350           for (k = 0; k < group_size; ++k)
1351             dump_printf (MSG_NOTE, "%d ", k);
1352       dump_printf (MSG_NOTE, "\n");
1353     }
1354
1355   /* In case of reduction every load permutation is allowed, since the order
1356      of the reduction statements is not important (as opposed to the case of
1357      grouped stores).  The only condition we need to check is that all the
1358      load nodes are of the same size and have the same permutation (and then
1359      rearrange all the nodes of the SLP instance according to this 
1360      permutation).  */
1361
1362   /* Check that all the load nodes are of the same size.  */
1363   /* ???  Can't we assert this? */
1364   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1365     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1366       return false;
1367
1368   node = SLP_INSTANCE_TREE (slp_instn);
1369   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1370
1371   /* Reduction (there are no data-refs in the root).
1372      In reduction chain the order of the loads is important.  */
1373   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1374       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1375     {
1376       slp_tree load;
1377       unsigned int lidx;
1378
1379       /* Compare all the permutation sequences to the first one.  We know
1380          that at least one load is permuted.  */
1381       node = SLP_INSTANCE_LOADS (slp_instn)[0];
1382       if (!node->load_permutation.exists ())
1383         return false;
1384       for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1385         {
1386           if (!load->load_permutation.exists ())
1387             return false;
1388           FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1389             if (lidx != node->load_permutation[j])
1390               return false;
1391         }
1392
1393       /* Check that the loads in the first sequence are different and there
1394          are no gaps between them.  */
1395       load_index = sbitmap_alloc (group_size);
1396       bitmap_clear (load_index);
1397       FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1398         {
1399           if (bitmap_bit_p (load_index, lidx))
1400             {
1401               sbitmap_free (load_index);
1402               return false;
1403             }
1404           bitmap_set_bit (load_index, lidx);
1405         }
1406       for (i = 0; i < group_size; i++)
1407         if (!bitmap_bit_p (load_index, i))
1408           {
1409             sbitmap_free (load_index);
1410             return false;
1411           }
1412       sbitmap_free (load_index);
1413
1414       /* This permutation is valid for reduction.  Since the order of the
1415          statements in the nodes is not important unless they are memory
1416          accesses, we can rearrange the statements in all the nodes
1417          according to the order of the loads.  */
1418       vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1419                                 node->load_permutation);
1420
1421       /* We are done, no actual permutations need to be generated.  */
1422       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1423         SLP_TREE_LOAD_PERMUTATION (node).release ();
1424       return true;
1425     }
1426
1427   /* In basic block vectorization we allow any subchain of an interleaving
1428      chain.
1429      FORNOW: not supported in loop SLP because of realignment compications.  */
1430   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1431     {
1432       /* Check whether the loads in an instance form a subchain and thus
1433          no permutation is necessary.  */
1434       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1435         {
1436           if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1437             continue;
1438           bool subchain_p = true;
1439           next_load = NULL;
1440           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1441             {
1442               if (j != 0 && next_load != load)
1443                 {
1444                   subchain_p = false;
1445                   break;
1446                 }
1447               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1448             }
1449           if (subchain_p)
1450             SLP_TREE_LOAD_PERMUTATION (node).release ();
1451           else
1452             {
1453               /* Verify the permutation can be generated.  */
1454               vec<tree> tem;
1455               if (!vect_transform_slp_perm_load (node, tem, NULL,
1456                                                  1, slp_instn, true))
1457                 {
1458                   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1459                                    vect_location,
1460                                    "unsupported load permutation\n");
1461                   return false;
1462                 }
1463             }
1464         }
1465
1466       /* Check that the alignment of the first load in every subchain, i.e.,
1467          the first statement in every load node, is supported.
1468          ???  This belongs in alignment checking.  */
1469       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1470         {
1471           first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1472           if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1473             {
1474               dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1475               if (vect_supportable_dr_alignment (dr, false)
1476                   == dr_unaligned_unsupported)
1477                 {
1478                   if (dump_enabled_p ())
1479                     {
1480                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1481                                        vect_location,
1482                                        "unsupported unaligned load ");
1483                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1484                                         first_load, 0);
1485                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1486                     }
1487                   return false;
1488                 }
1489             }
1490         }
1491
1492       return true;
1493     }
1494
1495   /* For loop vectorization verify we can generate the permutation.  */
1496   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1497     if (node->load_permutation.exists ()
1498         && !vect_transform_slp_perm_load
1499               (node, vNULL, NULL,
1500                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1501       return false;
1502
1503   return true;
1504 }
1505
1506
1507 /* Find the last store in SLP INSTANCE.  */
1508
1509 static gimple
1510 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1511 {
1512   gimple last = NULL, stmt;
1513
1514   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1515     {
1516       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1517       if (is_pattern_stmt_p (stmt_vinfo))
1518         last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1519       else
1520         last = get_later_stmt (stmt, last);
1521     }
1522
1523   return last;
1524 }
1525
1526 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1527
1528 static void
1529 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1530                          stmt_vector_for_cost *prologue_cost_vec,
1531                          stmt_vector_for_cost *body_cost_vec,
1532                          unsigned ncopies_for_cost)
1533 {
1534   unsigned i;
1535   slp_tree child;
1536   gimple stmt, s;
1537   stmt_vec_info stmt_info;
1538   tree lhs;
1539   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1540
1541   /* Recurse down the SLP tree.  */
1542   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1543     if (child)
1544       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1545                                body_cost_vec, ncopies_for_cost);
1546
1547   /* Look at the first scalar stmt to determine the cost.  */
1548   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1549   stmt_info = vinfo_for_stmt (stmt);
1550   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1551     {
1552       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1553         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1554                                vect_uninitialized_def,
1555                                node, prologue_cost_vec, body_cost_vec);
1556       else
1557         {
1558           int i;
1559           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1560           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1561                                 node, prologue_cost_vec, body_cost_vec);
1562           /* If the load is permuted record the cost for the permutation.
1563              ???  Loads from multiple chains are let through here only
1564              for a single special case involving complex numbers where
1565              in the end no permutation is necessary.  */
1566           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1567             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1568                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1569                 && vect_get_place_in_interleaving_chain
1570                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1571               {
1572                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1573                                   stmt_info, 0, vect_body);
1574                 break;
1575               }
1576         }
1577     }
1578   else
1579     {
1580       record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1581                         stmt_info, 0, vect_body);
1582       if (SLP_TREE_TWO_OPERATORS (node))
1583         {
1584           record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1585                             stmt_info, 0, vect_body);
1586           record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1587                             stmt_info, 0, vect_body);
1588         }
1589     }
1590
1591   /* Scan operands and account for prologue cost of constants/externals.
1592      ???  This over-estimates cost for multiple uses and should be
1593      re-engineered.  */
1594   lhs = gimple_get_lhs (stmt);
1595   for (i = 0; i < gimple_num_ops (stmt); ++i)
1596     {
1597       tree def, op = gimple_op (stmt, i);
1598       gimple def_stmt;
1599       enum vect_def_type dt;
1600       if (!op || op == lhs)
1601         continue;
1602       if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
1603                               STMT_VINFO_BB_VINFO (stmt_info),
1604                               &def_stmt, &def, &dt))
1605         {
1606           /* Without looking at the actual initializer a vector of
1607              constants can be implemented as load from the constant pool.
1608              ???  We need to pass down stmt_info for a vector type
1609              even if it points to the wrong stmt.  */
1610           if (dt == vect_constant_def)
1611             record_stmt_cost (prologue_cost_vec, 1, vector_load,
1612                               stmt_info, 0, vect_prologue);
1613           else if (dt == vect_external_def)
1614             record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1615                               stmt_info, 0, vect_prologue);
1616         }
1617     }
1618 }
1619
1620 /* Compute the cost for the SLP instance INSTANCE.  */
1621
1622 static void
1623 vect_analyze_slp_cost (slp_instance instance, void *data)
1624 {
1625   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1626   unsigned ncopies_for_cost;
1627   stmt_info_for_cost *si;
1628   unsigned i;
1629
1630   /* Calculate the number of vector stmts to create based on the unrolling
1631      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1632      GROUP_SIZE / NUNITS otherwise.  */
1633   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1634   slp_tree node = SLP_INSTANCE_TREE (instance);
1635   stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1636   /* Adjust the group_size by the vectorization factor which is always one
1637      for basic-block vectorization.  */
1638   if (STMT_VINFO_LOOP_VINFO (stmt_info))
1639     group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1640   unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1641   /* For reductions look at a reduction operand in case the reduction
1642      operation is widening like DOT_PROD or SAD.  */
1643   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1644     {
1645       gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1646       switch (gimple_assign_rhs_code (stmt))
1647         {
1648         case DOT_PROD_EXPR:
1649         case SAD_EXPR:
1650           nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1651                                 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1652           break;
1653         default:;
1654         }
1655     }
1656   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1657
1658   prologue_cost_vec.create (10);
1659   body_cost_vec.create (10);
1660   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1661                            &prologue_cost_vec, &body_cost_vec,
1662                            ncopies_for_cost);
1663
1664   /* Record the prologue costs, which were delayed until we were
1665      sure that SLP was successful.  */
1666   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1667     {
1668       struct _stmt_vec_info *stmt_info
1669         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1670       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1671                             si->misalign, vect_prologue);
1672     }
1673
1674   /* Record the instance's instructions in the target cost model.  */
1675   FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1676     {
1677       struct _stmt_vec_info *stmt_info
1678         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1679       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1680                             si->misalign, vect_body);
1681     }
1682
1683   prologue_cost_vec.release ();
1684   body_cost_vec.release ();
1685 }
1686
1687 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1688    vect_build_slp_tree to build a tree of packed stmts if possible.
1689    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1690
1691 static bool
1692 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1693                            gimple stmt, unsigned max_tree_size)
1694 {
1695   slp_instance new_instance;
1696   slp_tree node;
1697   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1698   unsigned int unrolling_factor = 1, nunits;
1699   tree vectype, scalar_type = NULL_TREE;
1700   gimple next;
1701   unsigned int vectorization_factor = 0;
1702   int i;
1703   unsigned int max_nunits = 0;
1704   vec<slp_tree> loads;
1705   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1706   vec<gimple> scalar_stmts;
1707
1708   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1709     {
1710       if (dr)
1711         {
1712           scalar_type = TREE_TYPE (DR_REF (dr));
1713           vectype = get_vectype_for_scalar_type (scalar_type);
1714         }
1715       else
1716         {
1717           gcc_assert (loop_vinfo);
1718           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1719         }
1720
1721       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1722     }
1723   else
1724     {
1725       gcc_assert (loop_vinfo);
1726       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1727       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1728     }
1729
1730   if (!vectype)
1731     {
1732       if (dump_enabled_p ())
1733         {
1734           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1735                            "Build SLP failed: unsupported data-type ");
1736           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1737           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1738         }
1739
1740       return false;
1741     }
1742
1743   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1744   if (loop_vinfo)
1745     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1746   else
1747     vectorization_factor = nunits;
1748
1749   /* Calculate the unrolling factor.  */
1750   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1751   if (unrolling_factor != 1 && !loop_vinfo)
1752     {
1753       if (dump_enabled_p ())
1754         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1755                          "Build SLP failed: unrolling required in basic"
1756                          " block SLP\n");
1757
1758       return false;
1759     }
1760
1761   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1762   scalar_stmts.create (group_size);
1763   next = stmt;
1764   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1765     {
1766       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1767       while (next)
1768         {
1769           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1770               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1771             scalar_stmts.safe_push (
1772                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1773           else
1774             scalar_stmts.safe_push (next);
1775           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1776         }
1777       /* Mark the first element of the reduction chain as reduction to properly
1778          transform the node.  In the reduction analysis phase only the last
1779          element of the chain is marked as reduction.  */
1780       if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1781         STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1782     }
1783   else
1784     {
1785       /* Collect reduction statements.  */
1786       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1787       for (i = 0; reductions.iterate (i, &next); i++)
1788         scalar_stmts.safe_push (next);
1789     }
1790
1791   node = vect_create_new_slp_node (scalar_stmts);
1792
1793   loads.create (group_size);
1794
1795   /* Build the tree for the SLP instance.  */
1796   bool *matches = XALLOCAVEC (bool, group_size);
1797   unsigned npermutes = 0;
1798   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1799                            &max_nunits, &loads,
1800                            vectorization_factor, matches, &npermutes, NULL,
1801                            max_tree_size))
1802     {
1803       /* Calculate the unrolling factor based on the smallest type.  */
1804       if (max_nunits > nunits)
1805         unrolling_factor = least_common_multiple (max_nunits, group_size)
1806                            / group_size;
1807
1808       if (unrolling_factor != 1 && !loop_vinfo)
1809         {
1810           if (dump_enabled_p ())
1811             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1812                              "Build SLP failed: unrolling required in basic"
1813                              " block SLP\n");
1814           vect_free_slp_tree (node);
1815           loads.release ();
1816           return false;
1817         }
1818
1819       /* Create a new SLP instance.  */
1820       new_instance = XNEW (struct _slp_instance);
1821       SLP_INSTANCE_TREE (new_instance) = node;
1822       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1823       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1824       SLP_INSTANCE_LOADS (new_instance) = loads;
1825
1826       /* Compute the load permutation.  */
1827       slp_tree load_node;
1828       bool loads_permuted = false;
1829       FOR_EACH_VEC_ELT (loads, i, load_node)
1830         {
1831           vec<unsigned> load_permutation;
1832           int j;
1833           gimple load, first_stmt;
1834           bool this_load_permuted = false;
1835           load_permutation.create (group_size);
1836           first_stmt = GROUP_FIRST_ELEMENT
1837               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1838           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1839             {
1840               int load_place
1841                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1842               gcc_assert (load_place != -1);
1843               if (load_place != j)
1844                 this_load_permuted = true;
1845               load_permutation.safe_push (load_place);
1846             }
1847           if (!this_load_permuted)
1848             {
1849               load_permutation.release ();
1850               continue;
1851             }
1852           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1853           loads_permuted = true;
1854         }
1855
1856       if (loads_permuted)
1857         {
1858           if (!vect_supported_load_permutation_p (new_instance))
1859             {
1860               if (dump_enabled_p ())
1861                 {
1862                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1863                                    "Build SLP failed: unsupported load "
1864                                    "permutation ");
1865                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1866                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1867                 }
1868               vect_free_slp_instance (new_instance);
1869               return false;
1870             }
1871         }
1872
1873
1874       if (loop_vinfo)
1875         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1876       else
1877         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1878
1879       if (dump_enabled_p ())
1880         vect_print_slp_tree (MSG_NOTE, node);
1881
1882       return true;
1883     }
1884
1885   /* Failed to SLP.  */
1886   /* Free the allocated memory.  */
1887   vect_free_slp_tree (node);
1888   loads.release ();
1889
1890   return false;
1891 }
1892
1893
1894 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1895    trees of packed scalar stmts if SLP is possible.  */
1896
1897 bool
1898 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1899                   unsigned max_tree_size)
1900 {
1901   unsigned int i;
1902   vec<gimple> grouped_stores;
1903   vec<gimple> reductions = vNULL;
1904   vec<gimple> reduc_chains = vNULL;
1905   gimple first_element;
1906   bool ok = false;
1907
1908   if (dump_enabled_p ())
1909     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1910
1911   if (loop_vinfo)
1912     {
1913       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1914       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1915       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1916     }
1917   else
1918     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1919
1920   /* Find SLP sequences starting from groups of grouped stores.  */
1921   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1922     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1923                                    max_tree_size))
1924       ok = true;
1925
1926   if (reduc_chains.length () > 0)
1927     {
1928       /* Find SLP sequences starting from reduction chains.  */
1929       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1930         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1931                                        max_tree_size))
1932           ok = true;
1933         else
1934           return false;
1935
1936       /* Don't try to vectorize SLP reductions if reduction chain was
1937          detected.  */
1938       return ok;
1939     }
1940
1941   /* Find SLP sequences starting from groups of reductions.  */
1942   if (reductions.length () > 1
1943       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1944                                     max_tree_size))
1945     ok = true;
1946
1947   return true;
1948 }
1949
1950
1951 /* For each possible SLP instance decide whether to SLP it and calculate overall
1952    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1953    least one instance.  */
1954
1955 bool
1956 vect_make_slp_decision (loop_vec_info loop_vinfo)
1957 {
1958   unsigned int i, unrolling_factor = 1;
1959   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1960   slp_instance instance;
1961   int decided_to_slp = 0;
1962
1963   if (dump_enabled_p ())
1964     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1965                      "\n");
1966
1967   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1968     {
1969       /* FORNOW: SLP if you can.  */
1970       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1971         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1972
1973       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1974          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1975          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1976       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1977       decided_to_slp++;
1978     }
1979
1980   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1981
1982   if (decided_to_slp && dump_enabled_p ())
1983     dump_printf_loc (MSG_NOTE, vect_location,
1984                      "Decided to SLP %d instances. Unrolling factor %d\n",
1985                      decided_to_slp, unrolling_factor);
1986
1987   return (decided_to_slp > 0);
1988 }
1989
1990
1991 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1992    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1993
1994 static void
1995 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
1996 {
1997   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[i];
1998   imm_use_iterator imm_iter;
1999   gimple use_stmt;
2000   stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2001   slp_tree child;
2002   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2003   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2004   int j;
2005
2006   /* Propagate hybrid down the SLP tree.  */
2007   if (stype == hybrid)
2008     ;
2009   else if (HYBRID_SLP_STMT (stmt_vinfo))
2010     stype = hybrid;
2011   else
2012     {
2013       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
2014       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2015       /* We always get the pattern stmt here, but for immediate
2016          uses we have to use the LHS of the original stmt.  */
2017       gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2018       if (STMT_VINFO_RELATED_STMT (stmt_vinfo))
2019         stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2020       if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2021         FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
2022           {
2023             if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2024               continue;
2025             use_vinfo = vinfo_for_stmt (use_stmt);
2026             if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2027                 && STMT_VINFO_RELATED_STMT (use_vinfo))
2028               use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2029             if (!STMT_SLP_TYPE (use_vinfo)
2030                 && (STMT_VINFO_RELEVANT (use_vinfo)
2031                     || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2032                 && !(gimple_code (use_stmt) == GIMPLE_PHI
2033                      && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2034               stype = hybrid;
2035           }
2036     }
2037
2038   if (stype == hybrid)
2039     {
2040       if (dump_enabled_p ())
2041         {
2042           dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2043           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2044         }
2045       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2046     }
2047
2048   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2049     if (child)
2050       vect_detect_hybrid_slp_stmts (child, i, stype);
2051 }
2052
2053 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
2054
2055 static tree
2056 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2057 {
2058   walk_stmt_info *wi = (walk_stmt_info *)data;
2059   struct loop *loopp = (struct loop *)wi->info;
2060
2061   if (wi->is_lhs)
2062     return NULL_TREE;
2063
2064   if (TREE_CODE (*tp) == SSA_NAME
2065       && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2066     {
2067       gimple def_stmt = SSA_NAME_DEF_STMT (*tp);
2068       if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2069           && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2070         {
2071           if (dump_enabled_p ())
2072             {
2073               dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2074               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2075             }
2076           STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2077         }
2078     }
2079
2080   return NULL_TREE;
2081 }
2082
2083 static tree
2084 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2085                           walk_stmt_info *)
2086 {
2087   /* If the stmt is in a SLP instance then this isn't a reason
2088      to mark use definitions in other SLP instances as hybrid.  */
2089   if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2090     *handled = true;
2091   return NULL_TREE;
2092 }
2093
2094 /* Find stmts that must be both vectorized and SLPed.  */
2095
2096 void
2097 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2098 {
2099   unsigned int i;
2100   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2101   slp_instance instance;
2102
2103   if (dump_enabled_p ())
2104     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2105                      "\n");
2106
2107   /* First walk all pattern stmt in the loop and mark defs of uses as
2108      hybrid because immediate uses in them are not recorded.  */
2109   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2110     {
2111       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2112       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2113            gsi_next (&gsi))
2114         {
2115           gimple stmt = gsi_stmt (gsi);
2116           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2117           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2118             {
2119               walk_stmt_info wi;
2120               memset (&wi, 0, sizeof (wi));
2121               wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2122               gimple_stmt_iterator gsi2
2123                 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2124               walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2125                                 vect_detect_hybrid_slp_1, &wi);
2126               walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2127                                vect_detect_hybrid_slp_2,
2128                                vect_detect_hybrid_slp_1, &wi);
2129             }
2130         }
2131     }
2132
2133   /* Then walk the SLP instance trees marking stmts with uses in
2134      non-SLP stmts as hybrid, also propagating hybrid down the
2135      SLP tree, collecting the above info on-the-fly.  */
2136   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2137     {
2138       for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2139         vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2140                                       i, pure_slp);
2141     }
2142 }
2143
2144
2145 /* Create and initialize a new bb_vec_info struct for BB, as well as
2146    stmt_vec_info structs for all the stmts in it.  */
2147
2148 static bb_vec_info
2149 new_bb_vec_info (basic_block bb)
2150 {
2151   bb_vec_info res = NULL;
2152   gimple_stmt_iterator gsi;
2153
2154   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2155   BB_VINFO_BB (res) = bb;
2156
2157   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2158     {
2159       gimple stmt = gsi_stmt (gsi);
2160       gimple_set_uid (stmt, 0);
2161       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
2162     }
2163
2164   BB_VINFO_GROUPED_STORES (res).create (10);
2165   BB_VINFO_SLP_INSTANCES (res).create (2);
2166   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2167
2168   bb->aux = res;
2169   return res;
2170 }
2171
2172
2173 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2174    stmts in the basic block.  */
2175
2176 static void
2177 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2178 {
2179   vec<slp_instance> slp_instances;
2180   slp_instance instance;
2181   basic_block bb;
2182   gimple_stmt_iterator si;
2183   unsigned i;
2184
2185   if (!bb_vinfo)
2186     return;
2187
2188   bb = BB_VINFO_BB (bb_vinfo);
2189
2190   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2191     {
2192       gimple stmt = gsi_stmt (si);
2193       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2194
2195       if (stmt_info)
2196         /* Free stmt_vec_info.  */
2197         free_stmt_vec_info (stmt);
2198     }
2199
2200   vect_destroy_datarefs (NULL, bb_vinfo);
2201   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2202   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2203   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2204   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2205     vect_free_slp_instance (instance);
2206   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2207   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2208   free (bb_vinfo);
2209   bb->aux = NULL;
2210 }
2211
2212
2213 /* Analyze statements contained in SLP tree node after recursively analyzing
2214    the subtree. Return TRUE if the operations are supported.  */
2215
2216 static bool
2217 vect_slp_analyze_node_operations (slp_tree node)
2218 {
2219   bool dummy;
2220   int i;
2221   gimple stmt;
2222   slp_tree child;
2223
2224   if (!node)
2225     return true;
2226
2227   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2228     if (!vect_slp_analyze_node_operations (child))
2229       return false;
2230
2231   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2232     {
2233       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2234       gcc_assert (stmt_info);
2235       gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2236
2237       if (!vect_analyze_stmt (stmt, &dummy, node))
2238         return false;
2239     }
2240
2241   return true;
2242 }
2243
2244
2245 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
2246    operations are supported. */
2247
2248 bool
2249 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2250 {
2251   slp_instance instance;
2252   int i;
2253
2254   if (dump_enabled_p ())
2255     dump_printf_loc (MSG_NOTE, vect_location,
2256                      "=== vect_slp_analyze_operations ===\n");
2257
2258   for (i = 0; slp_instances.iterate (i, &instance); )
2259     {
2260       if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2261         {
2262           dump_printf_loc (MSG_NOTE, vect_location,
2263                            "removing SLP instance operations starting from: ");
2264           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2265                             SLP_TREE_SCALAR_STMTS
2266                               (SLP_INSTANCE_TREE (instance))[0], 0);
2267           vect_free_slp_instance (instance);
2268           slp_instances.ordered_remove (i);
2269         }
2270       else
2271         {
2272           /* Compute the costs of the SLP instance.  */
2273           vect_analyze_slp_cost (instance, data);
2274           i++;
2275         }
2276     }
2277
2278   if (!slp_instances.length ())
2279     return false;
2280
2281   return true;
2282 }
2283
2284
2285 /* Compute the scalar cost of the SLP node NODE and its children
2286    and return it.  Do not account defs that are marked in LIFE and
2287    update LIFE according to uses of NODE.  */
2288
2289 static unsigned
2290 vect_bb_slp_scalar_cost (basic_block bb,
2291                          slp_tree node, vec<bool, va_heap> *life)
2292 {
2293   unsigned scalar_cost = 0;
2294   unsigned i;
2295   gimple stmt;
2296   slp_tree child;
2297
2298   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2299     {
2300       unsigned stmt_cost;
2301       ssa_op_iter op_iter;
2302       def_operand_p def_p;
2303       stmt_vec_info stmt_info;
2304
2305       if ((*life)[i])
2306         continue;
2307
2308       /* If there is a non-vectorized use of the defs then the scalar
2309          stmt is kept live in which case we do not account it or any
2310          required defs in the SLP children in the scalar cost.  This
2311          way we make the vectorization more costly when compared to
2312          the scalar cost.  */
2313       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2314         {
2315           imm_use_iterator use_iter;
2316           gimple use_stmt;
2317           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2318             if (!is_gimple_debug (use_stmt)
2319                 && (gimple_code (use_stmt) == GIMPLE_PHI
2320                     || gimple_bb (use_stmt) != bb
2321                     || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2322               {
2323                 (*life)[i] = true;
2324                 BREAK_FROM_IMM_USE_STMT (use_iter);
2325               }
2326         }
2327       if ((*life)[i])
2328         continue;
2329
2330       stmt_info = vinfo_for_stmt (stmt);
2331       if (STMT_VINFO_DATA_REF (stmt_info))
2332         {
2333           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2334             stmt_cost = vect_get_stmt_cost (scalar_load);
2335           else
2336             stmt_cost = vect_get_stmt_cost (scalar_store);
2337         }
2338       else
2339         stmt_cost = vect_get_stmt_cost (scalar_stmt);
2340
2341       scalar_cost += stmt_cost;
2342     }
2343
2344   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2345     if (child)
2346       scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2347
2348   return scalar_cost;
2349 }
2350
2351 /* Check if vectorization of the basic block is profitable.  */
2352
2353 static bool
2354 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2355 {
2356   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2357   slp_instance instance;
2358   int i;
2359   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2360   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2361
2362   /* Calculate scalar cost.  */
2363   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2364     {
2365       auto_vec<bool, 20> life;
2366       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2367       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2368                                               SLP_INSTANCE_TREE (instance),
2369                                               &life);
2370     }
2371
2372   /* Complete the target-specific cost calculation.  */
2373   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2374                &vec_inside_cost, &vec_epilogue_cost);
2375
2376   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2377
2378   if (dump_enabled_p ())
2379     {
2380       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2381       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2382                    vec_inside_cost);
2383       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2384       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2385       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
2386     }
2387
2388   /* Vectorization is profitable if its cost is less than the cost of scalar
2389      version.  */
2390   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2391     return false;
2392
2393   return true;
2394 }
2395
2396 /* Check if the basic block can be vectorized.  */
2397
2398 static bb_vec_info
2399 vect_slp_analyze_bb_1 (basic_block bb)
2400 {
2401   bb_vec_info bb_vinfo;
2402   vec<slp_instance> slp_instances;
2403   slp_instance instance;
2404   int i;
2405   int min_vf = 2;
2406   unsigned n_stmts = 0;
2407
2408   bb_vinfo = new_bb_vec_info (bb);
2409   if (!bb_vinfo)
2410     return NULL;
2411
2412   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
2413     {
2414       if (dump_enabled_p ())
2415         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2416                          "not vectorized: unhandled data-ref in basic "
2417                          "block.\n");
2418
2419       destroy_bb_vec_info (bb_vinfo);
2420       return NULL;
2421     }
2422
2423   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2424     {
2425       if (dump_enabled_p ())
2426         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2427                          "not vectorized: not enough data-refs in "
2428                          "basic block.\n");
2429
2430       destroy_bb_vec_info (bb_vinfo);
2431       return NULL;
2432     }
2433
2434   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2435     {
2436      if (dump_enabled_p ())
2437        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2438                         "not vectorized: unhandled data access in "
2439                         "basic block.\n");
2440
2441       destroy_bb_vec_info (bb_vinfo);
2442       return NULL;
2443     }
2444
2445   vect_pattern_recog (NULL, bb_vinfo);
2446
2447   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2448     {
2449       if (dump_enabled_p ())
2450         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2451                          "not vectorized: bad data alignment in basic "
2452                          "block.\n");
2453
2454       destroy_bb_vec_info (bb_vinfo);
2455       return NULL;
2456     }
2457
2458   /* Check the SLP opportunities in the basic block, analyze and build SLP
2459      trees.  */
2460   if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
2461     {
2462       if (dump_enabled_p ())
2463         {
2464           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2465                            "Failed to SLP the basic block.\n");
2466           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2467                            "not vectorized: failed to find SLP opportunities "
2468                            "in basic block.\n");
2469         }
2470
2471       destroy_bb_vec_info (bb_vinfo);
2472       return NULL;
2473     }
2474
2475   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2476
2477   /* Mark all the statements that we want to vectorize as pure SLP and
2478      relevant.  */
2479   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2480     {
2481       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2482       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2483     }
2484
2485   /* Mark all the statements that we do not want to vectorize.  */
2486   for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2487        !gsi_end_p (gsi); gsi_next (&gsi))
2488     {
2489       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2490       if (STMT_SLP_TYPE (vinfo) != pure_slp)
2491         STMT_VINFO_VECTORIZABLE (vinfo) = false;
2492     }
2493
2494   /* Analyze dependences.  At this point all stmts not participating in
2495      vectorization have to be marked.  Dependence analysis assumes
2496      that we either vectorize all SLP instances or none at all.  */
2497   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2498      {
2499        if (dump_enabled_p ())
2500          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501                           "not vectorized: unhandled data dependence "
2502                           "in basic block.\n");
2503
2504        destroy_bb_vec_info (bb_vinfo);
2505        return NULL;
2506      }
2507
2508   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2509     {
2510       if (dump_enabled_p ())
2511         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2512                          "not vectorized: unsupported alignment in basic "
2513                          "block.\n");
2514       destroy_bb_vec_info (bb_vinfo);
2515       return NULL;
2516     }
2517
2518   if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2519                                     BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2520     {
2521       if (dump_enabled_p ())
2522         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2523                          "not vectorized: bad operation in basic block.\n");
2524
2525       destroy_bb_vec_info (bb_vinfo);
2526       return NULL;
2527     }
2528
2529   /* Cost model: check if the vectorization is worthwhile.  */
2530   if (!unlimited_cost_model (NULL)
2531       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2532     {
2533       if (dump_enabled_p ())
2534         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2535                          "not vectorized: vectorization is not "
2536                          "profitable.\n");
2537
2538       destroy_bb_vec_info (bb_vinfo);
2539       return NULL;
2540     }
2541
2542   if (dump_enabled_p ())
2543     dump_printf_loc (MSG_NOTE, vect_location,
2544                      "Basic block will be vectorized using SLP\n");
2545
2546   return bb_vinfo;
2547 }
2548
2549
2550 bb_vec_info
2551 vect_slp_analyze_bb (basic_block bb)
2552 {
2553   bb_vec_info bb_vinfo;
2554   int insns = 0;
2555   gimple_stmt_iterator gsi;
2556   unsigned int vector_sizes;
2557
2558   if (dump_enabled_p ())
2559     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2560
2561   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2562     {
2563       gimple stmt = gsi_stmt (gsi);
2564       if (!is_gimple_debug (stmt)
2565           && !gimple_nop_p (stmt)
2566           && gimple_code (stmt) != GIMPLE_LABEL)
2567         insns++;
2568     }
2569
2570   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2571     {
2572       if (dump_enabled_p ())
2573         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2574                          "not vectorized: too many instructions in "
2575                          "basic block.\n");
2576
2577       return NULL;
2578     }
2579
2580   /* Autodetect first vector size we try.  */
2581   current_vector_size = 0;
2582   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2583
2584   while (1)
2585     {
2586       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2587       if (bb_vinfo)
2588         return bb_vinfo;
2589
2590       destroy_bb_vec_info (bb_vinfo);
2591
2592       vector_sizes &= ~current_vector_size;
2593       if (vector_sizes == 0
2594           || current_vector_size == 0)
2595         return NULL;
2596
2597       /* Try the next biggest vector size.  */
2598       current_vector_size = 1 << floor_log2 (vector_sizes);
2599       if (dump_enabled_p ())
2600         dump_printf_loc (MSG_NOTE, vect_location,
2601                          "***** Re-trying analysis with "
2602                          "vector size %d\n", current_vector_size);
2603     }
2604 }
2605
2606
2607 /* For constant and loop invariant defs of SLP_NODE this function returns
2608    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2609    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2610    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2611    REDUC_INDEX is the index of the reduction operand in the statements, unless
2612    it is -1.  */
2613
2614 static void
2615 vect_get_constant_vectors (tree op, slp_tree slp_node,
2616                            vec<tree> *vec_oprnds,
2617                            unsigned int op_num, unsigned int number_of_vectors,
2618                            int reduc_index)
2619 {
2620   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2621   gimple stmt = stmts[0];
2622   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2623   unsigned nunits;
2624   tree vec_cst;
2625   tree *elts;
2626   unsigned j, number_of_places_left_in_vector;
2627   tree vector_type;
2628   tree vop;
2629   int group_size = stmts.length ();
2630   unsigned int vec_num, i;
2631   unsigned number_of_copies = 1;
2632   vec<tree> voprnds;
2633   voprnds.create (number_of_vectors);
2634   bool constant_p, is_store;
2635   tree neutral_op = NULL;
2636   enum tree_code code = gimple_expr_code (stmt);
2637   gimple def_stmt;
2638   struct loop *loop;
2639   gimple_seq ctor_seq = NULL;
2640
2641   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2642   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2643
2644   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2645       && reduc_index != -1)
2646     {
2647       op_num = reduc_index;
2648       op = gimple_op (stmt, op_num + 1);
2649       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2650          we need either neutral operands or the original operands.  See
2651          get_initial_def_for_reduction() for details.  */
2652       switch (code)
2653         {
2654           case WIDEN_SUM_EXPR:
2655           case DOT_PROD_EXPR:
2656           case SAD_EXPR:
2657           case PLUS_EXPR:
2658           case MINUS_EXPR:
2659           case BIT_IOR_EXPR:
2660           case BIT_XOR_EXPR:
2661              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2662                neutral_op = build_real (TREE_TYPE (op), dconst0);
2663              else
2664                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2665
2666              break;
2667
2668           case MULT_EXPR:
2669              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2670                neutral_op = build_real (TREE_TYPE (op), dconst1);
2671              else
2672                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2673
2674              break;
2675
2676           case BIT_AND_EXPR:
2677             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2678             break;
2679
2680           /* For MIN/MAX we don't have an easy neutral operand but
2681              the initial values can be used fine here.  Only for
2682              a reduction chain we have to force a neutral element.  */
2683           case MAX_EXPR:
2684           case MIN_EXPR:
2685             if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2686               neutral_op = NULL;
2687             else
2688               {
2689                 def_stmt = SSA_NAME_DEF_STMT (op);
2690                 loop = (gimple_bb (stmt))->loop_father;
2691                 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2692                                                     loop_preheader_edge (loop));
2693               }
2694             break;
2695
2696           default:
2697             gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
2698             neutral_op = NULL;
2699         }
2700     }
2701
2702   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2703     {
2704       is_store = true;
2705       op = gimple_assign_rhs1 (stmt);
2706     }
2707   else
2708     is_store = false;
2709
2710   gcc_assert (op);
2711
2712   if (CONSTANT_CLASS_P (op))
2713     constant_p = true;
2714   else
2715     constant_p = false;
2716
2717   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2718      created vectors. It is greater than 1 if unrolling is performed.
2719
2720      For example, we have two scalar operands, s1 and s2 (e.g., group of
2721      strided accesses of size two), while NUNITS is four (i.e., four scalars
2722      of this type can be packed in a vector).  The output vector will contain
2723      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2724      will be 2).
2725
2726      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2727      containing the operands.
2728
2729      For example, NUNITS is four as before, and the group size is 8
2730      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2731      {s5, s6, s7, s8}.  */
2732
2733   number_of_copies = nunits * number_of_vectors / group_size;
2734
2735   number_of_places_left_in_vector = nunits;
2736   elts = XALLOCAVEC (tree, nunits);
2737   bool place_after_defs = false;
2738   for (j = 0; j < number_of_copies; j++)
2739     {
2740       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2741         {
2742           if (is_store)
2743             op = gimple_assign_rhs1 (stmt);
2744           else
2745             {
2746               switch (code)
2747                 {
2748                   case COND_EXPR:
2749                     if (op_num == 0 || op_num == 1)
2750                       {
2751                         tree cond = gimple_assign_rhs1 (stmt);
2752                         op = TREE_OPERAND (cond, op_num);
2753                       }
2754                     else
2755                       {
2756                         if (op_num == 2)
2757                           op = gimple_assign_rhs2 (stmt);
2758                         else
2759                           op = gimple_assign_rhs3 (stmt);
2760                       }
2761                     break;
2762
2763                   case CALL_EXPR:
2764                     op = gimple_call_arg (stmt, op_num);
2765                     break;
2766
2767                   case LSHIFT_EXPR:
2768                   case RSHIFT_EXPR:
2769                   case LROTATE_EXPR:
2770                   case RROTATE_EXPR:
2771                     op = gimple_op (stmt, op_num + 1);
2772                     /* Unlike the other binary operators, shifts/rotates have
2773                        the shift count being int, instead of the same type as
2774                        the lhs, so make sure the scalar is the right type if
2775                        we are dealing with vectors of
2776                        long long/long/short/char.  */
2777                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2778                       op = fold_convert (TREE_TYPE (vector_type), op);
2779                     break;
2780
2781                   default:
2782                     op = gimple_op (stmt, op_num + 1);
2783                     break;
2784                 }
2785             }
2786
2787           if (reduc_index != -1)
2788             {
2789               loop = (gimple_bb (stmt))->loop_father;
2790               def_stmt = SSA_NAME_DEF_STMT (op);
2791
2792               gcc_assert (loop);
2793
2794               /* Get the def before the loop.  In reduction chain we have only
2795                  one initial value.  */
2796               if ((j != (number_of_copies - 1)
2797                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2798                        && i != 0))
2799                   && neutral_op)
2800                 op = neutral_op;
2801               else
2802                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2803                                             loop_preheader_edge (loop));
2804             }
2805
2806           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2807           number_of_places_left_in_vector--;
2808           tree orig_op = op;
2809           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2810             {
2811               if (CONSTANT_CLASS_P (op))
2812                 {
2813                   op = fold_unary (VIEW_CONVERT_EXPR,
2814                                    TREE_TYPE (vector_type), op);
2815                   gcc_assert (op && CONSTANT_CLASS_P (op));
2816                 }
2817               else
2818                 {
2819                   tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
2820                   gimple init_stmt;
2821                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
2822                   init_stmt
2823                     = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
2824                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2825                   op = new_temp;
2826                 }
2827             }
2828           elts[number_of_places_left_in_vector] = op;
2829           if (!CONSTANT_CLASS_P (op))
2830             constant_p = false;
2831           if (TREE_CODE (orig_op) == SSA_NAME
2832               && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2833               && STMT_VINFO_BB_VINFO (stmt_vinfo)
2834               && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2835                   == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2836             place_after_defs = true;
2837
2838           if (number_of_places_left_in_vector == 0)
2839             {
2840               number_of_places_left_in_vector = nunits;
2841
2842               if (constant_p)
2843                 vec_cst = build_vector (vector_type, elts);
2844               else
2845                 {
2846                   vec<constructor_elt, va_gc> *v;
2847                   unsigned k;
2848                   vec_alloc (v, nunits);
2849                   for (k = 0; k < nunits; ++k)
2850                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2851                   vec_cst = build_constructor (vector_type, v);
2852                 }
2853               tree init;
2854               gimple_stmt_iterator gsi;
2855               if (place_after_defs)
2856                 {
2857                   gsi = gsi_for_stmt
2858                           (vect_find_last_scalar_stmt_in_slp (slp_node));
2859                   init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2860                 }
2861               else
2862                 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
2863               if (ctor_seq != NULL)
2864                 {
2865                   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
2866                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2867                                                         GSI_SAME_STMT);
2868                   ctor_seq = NULL;
2869                 }
2870               voprnds.quick_push (init);
2871               place_after_defs = false;
2872             }
2873         }
2874     }
2875
2876   /* Since the vectors are created in the reverse order, we should invert
2877      them.  */
2878   vec_num = voprnds.length ();
2879   for (j = vec_num; j != 0; j--)
2880     {
2881       vop = voprnds[j - 1];
2882       vec_oprnds->quick_push (vop);
2883     }
2884
2885   voprnds.release ();
2886
2887   /* In case that VF is greater than the unrolling factor needed for the SLP
2888      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2889      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2890      to replicate the vectors.  */
2891   while (number_of_vectors > vec_oprnds->length ())
2892     {
2893       tree neutral_vec = NULL;
2894
2895       if (neutral_op)
2896         {
2897           if (!neutral_vec)
2898             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2899
2900           vec_oprnds->quick_push (neutral_vec);
2901         }
2902       else
2903         {
2904           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2905             vec_oprnds->quick_push (vop);
2906         }
2907     }
2908 }
2909
2910
2911 /* Get vectorized definitions from SLP_NODE that contains corresponding
2912    vectorized def-stmts.  */
2913
2914 static void
2915 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2916 {
2917   tree vec_oprnd;
2918   gimple vec_def_stmt;
2919   unsigned int i;
2920
2921   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2922
2923   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2924     {
2925       gcc_assert (vec_def_stmt);
2926       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2927       vec_oprnds->quick_push (vec_oprnd);
2928     }
2929 }
2930
2931
2932 /* Get vectorized definitions for SLP_NODE.
2933    If the scalar definitions are loop invariants or constants, collect them and
2934    call vect_get_constant_vectors() to create vector stmts.
2935    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2936    must be stored in the corresponding child of SLP_NODE, and we call
2937    vect_get_slp_vect_defs () to retrieve them.  */
2938
2939 void
2940 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2941                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2942 {
2943   gimple first_stmt;
2944   int number_of_vects = 0, i;
2945   unsigned int child_index = 0;
2946   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2947   slp_tree child = NULL;
2948   vec<tree> vec_defs;
2949   tree oprnd;
2950   bool vectorized_defs;
2951
2952   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2953   FOR_EACH_VEC_ELT (ops, i, oprnd)
2954     {
2955       /* For each operand we check if it has vectorized definitions in a child
2956          node or we need to create them (for invariants and constants).  We
2957          check if the LHS of the first stmt of the next child matches OPRND.
2958          If it does, we found the correct child.  Otherwise, we call
2959          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2960          to check this child node for the next operand.  */
2961       vectorized_defs = false;
2962       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2963         {
2964           child = SLP_TREE_CHILDREN (slp_node)[child_index];
2965
2966           /* We have to check both pattern and original def, if available.  */
2967           if (child)
2968             {
2969               gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2970               gimple related
2971                 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2972
2973               if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2974                   || (related
2975                       && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2976                 {
2977                   /* The number of vector defs is determined by the number of
2978                      vector statements in the node from which we get those
2979                      statements.  */
2980                   number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2981                   vectorized_defs = true;
2982                   child_index++;
2983                 }
2984             }
2985           else
2986             child_index++;
2987         }
2988
2989       if (!vectorized_defs)
2990         {
2991           if (i == 0)
2992             {
2993               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2994               /* Number of vector stmts was calculated according to LHS in
2995                  vect_schedule_slp_instance (), fix it by replacing LHS with
2996                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2997                  details.  */
2998               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2999                                              &rhs_size_unit);
3000               if (rhs_size_unit != lhs_size_unit)
3001                 {
3002                   number_of_vects *= rhs_size_unit;
3003                   number_of_vects /= lhs_size_unit;
3004                 }
3005             }
3006         }
3007
3008       /* Allocate memory for vectorized defs.  */
3009       vec_defs = vNULL;
3010       vec_defs.create (number_of_vects);
3011
3012       /* For reduction defs we call vect_get_constant_vectors (), since we are
3013          looking for initial loop invariant values.  */
3014       if (vectorized_defs && reduc_index == -1)
3015         /* The defs are already vectorized.  */
3016         vect_get_slp_vect_defs (child, &vec_defs);
3017       else
3018         /* Build vectors from scalar defs.  */
3019         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3020                                    number_of_vects, reduc_index);
3021
3022       vec_oprnds->quick_push (vec_defs);
3023
3024       /* For reductions, we only need initial values.  */
3025       if (reduc_index != -1)
3026         return;
3027     }
3028 }
3029
3030
3031 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
3032    building a vector of type MASK_TYPE from it) and two input vectors placed in
3033    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3034    shifting by STRIDE elements of DR_CHAIN for every copy.
3035    (STRIDE is the number of vectorized stmts for NODE divided by the number of
3036    copies).
3037    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3038    the created stmts must be inserted.  */
3039
3040 static inline void
3041 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
3042                            tree mask, int first_vec_indx, int second_vec_indx,
3043                            gimple_stmt_iterator *gsi, slp_tree node,
3044                            tree vectype, vec<tree> dr_chain,
3045                            int ncopies, int vect_stmts_counter)
3046 {
3047   tree perm_dest;
3048   gimple perm_stmt = NULL;
3049   stmt_vec_info next_stmt_info;
3050   int i, stride;
3051   tree first_vec, second_vec, data_ref;
3052
3053   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3054
3055   /* Initialize the vect stmts of NODE to properly insert the generated
3056      stmts later.  */
3057   for (i = SLP_TREE_VEC_STMTS (node).length ();
3058        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3059     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3060
3061   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3062   for (i = 0; i < ncopies; i++)
3063     {
3064       first_vec = dr_chain[first_vec_indx];
3065       second_vec = dr_chain[second_vec_indx];
3066
3067       /* Generate the permute statement.  */
3068       perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3069                                        first_vec, second_vec, mask);
3070       data_ref = make_ssa_name (perm_dest, perm_stmt);
3071       gimple_set_lhs (perm_stmt, data_ref);
3072       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3073
3074       /* Store the vector statement in NODE.  */
3075       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
3076
3077       first_vec_indx += stride;
3078       second_vec_indx += stride;
3079     }
3080
3081   /* Mark the scalar stmt as vectorized.  */
3082   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
3083   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
3084 }
3085
3086
3087 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
3088    return in CURRENT_MASK_ELEMENT its equivalent in target specific
3089    representation.  Check that the mask is valid and return FALSE if not.
3090    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3091    the next vector, i.e., the current first vector is not needed.  */
3092
3093 static bool
3094 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
3095                        int mask_nunits, bool only_one_vec, int index,
3096                        unsigned char *mask, int *current_mask_element,
3097                        bool *need_next_vector, int *number_of_mask_fixes,
3098                        bool *mask_fixed, bool *needs_first_vector)
3099 {
3100   int i;
3101
3102   /* Convert to target specific representation.  */
3103   *current_mask_element = first_mask_element + m;
3104   /* Adjust the value in case it's a mask for second and third vectors.  */
3105   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
3106
3107   if (*current_mask_element < 0)
3108     {
3109       if (dump_enabled_p ())
3110         {
3111           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3112                            "permutation requires past vector ");
3113           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3114           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3115         }
3116       return false;
3117     }
3118
3119   if (*current_mask_element < mask_nunits)
3120     *needs_first_vector = true;
3121
3122   /* We have only one input vector to permute but the mask accesses values in
3123      the next vector as well.  */
3124   if (only_one_vec && *current_mask_element >= mask_nunits)
3125     {
3126       if (dump_enabled_p ())
3127         {
3128           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3129                            "permutation requires at least two vectors ");
3130           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3131           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3132         }
3133
3134       return false;
3135     }
3136
3137   /* The mask requires the next vector.  */
3138   while (*current_mask_element >= mask_nunits * 2)
3139     {
3140       if (*needs_first_vector || *mask_fixed)
3141         {
3142           /* We either need the first vector too or have already moved to the
3143              next vector. In both cases, this permutation needs three
3144              vectors.  */
3145           if (dump_enabled_p ())
3146             {
3147               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3148                                "permutation requires at "
3149                                "least three vectors ");
3150               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3151               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3152             }
3153
3154           return false;
3155         }
3156
3157       /* We move to the next vector, dropping the first one and working with
3158          the second and the third - we need to adjust the values of the mask
3159          accordingly.  */
3160       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
3161
3162       for (i = 0; i < index; i++)
3163         mask[i] -= mask_nunits * *number_of_mask_fixes;
3164
3165       (*number_of_mask_fixes)++;
3166       *mask_fixed = true;
3167     }
3168
3169   *need_next_vector = *mask_fixed;
3170
3171   /* This was the last element of this mask. Start a new one.  */
3172   if (index == mask_nunits - 1)
3173     {
3174       *number_of_mask_fixes = 1;
3175       *mask_fixed = false;
3176       *needs_first_vector = false;
3177     }
3178
3179   return true;
3180 }
3181
3182
3183 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3184    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3185    permute statements for the SLP node NODE of the SLP instance
3186    SLP_NODE_INSTANCE.  */
3187
3188 bool
3189 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3190                               gimple_stmt_iterator *gsi, int vf,
3191                               slp_instance slp_node_instance, bool analyze_only)
3192 {
3193   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3194   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3195   tree mask_element_type = NULL_TREE, mask_type;
3196   int i, j, k, nunits, vec_index = 0, scalar_index;
3197   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3198   gimple next_scalar_stmt;
3199   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3200   int first_mask_element;
3201   int index, unroll_factor, current_mask_element, ncopies;
3202   unsigned char *mask;
3203   bool only_one_vec = false, need_next_vector = false;
3204   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
3205   int number_of_mask_fixes = 1;
3206   bool mask_fixed = false;
3207   bool needs_first_vector = false;
3208   machine_mode mode;
3209
3210   mode = TYPE_MODE (vectype);
3211
3212   if (!can_vec_perm_p (mode, false, NULL))
3213     {
3214       if (dump_enabled_p ())
3215         {
3216           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3217                            "no vect permute for ");
3218           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3219           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3220         }
3221       return false;
3222     }
3223
3224   /* The generic VEC_PERM_EXPR code always uses an integral type of the
3225      same size as the vector element being permuted.  */
3226   mask_element_type = lang_hooks.types.type_for_mode
3227                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3228   mask_type = get_vectype_for_scalar_type (mask_element_type);
3229   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3230   mask = XALLOCAVEC (unsigned char, nunits);
3231   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3232
3233   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3234      unrolling factor.  */
3235   orig_vec_stmts_num = group_size *
3236                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
3237   if (orig_vec_stmts_num == 1)
3238     only_one_vec = true;
3239
3240   /* Number of copies is determined by the final vectorization factor
3241      relatively to SLP_NODE_INSTANCE unrolling factor.  */
3242   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3243
3244   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3245     return false;
3246
3247   stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3248
3249   /* Generate permutation masks for every NODE. Number of masks for each NODE
3250      is equal to GROUP_SIZE.
3251      E.g., we have a group of three nodes with three loads from the same
3252      location in each node, and the vector size is 4. I.e., we have a
3253      a0b0c0a1b1c1... sequence and we need to create the following vectors:
3254      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3255      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3256      ...
3257
3258      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3259      The last mask is illegal since we assume two operands for permute
3260      operation, and the mask element values can't be outside that range.
3261      Hence, the last mask must be converted into {2,5,5,5}.
3262      For the first two permutations we need the first and the second input
3263      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3264      we need the second and the third vectors: {b1,c1,a2,b2} and
3265      {c2,a3,b3,c3}.  */
3266
3267   {
3268       scalar_index = 0;
3269       index = 0;
3270       vect_stmts_counter = 0;
3271       vec_index = 0;
3272       first_vec_index = vec_index++;
3273       if (only_one_vec)
3274         second_vec_index = first_vec_index;
3275       else
3276         second_vec_index =  vec_index++;
3277
3278       for (j = 0; j < unroll_factor; j++)
3279         {
3280           for (k = 0; k < group_size; k++)
3281             {
3282               i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3283               first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info);
3284               if (!vect_get_mask_element (stmt, first_mask_element, 0,
3285                                           nunits, only_one_vec, index,
3286                                           mask, &current_mask_element,
3287                                           &need_next_vector,
3288                                           &number_of_mask_fixes, &mask_fixed,
3289                                           &needs_first_vector))
3290                 return false;
3291               gcc_assert (current_mask_element >= 0
3292                           && current_mask_element < 2 * nunits);
3293               mask[index++] = current_mask_element;
3294
3295               if (index == nunits)
3296                 {
3297                   index = 0;
3298                   if (!can_vec_perm_p (mode, false, mask))
3299                     {
3300                       if (dump_enabled_p ())
3301                         {
3302                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3303                                            vect_location, 
3304                                            "unsupported vect permute { ");
3305                           for (i = 0; i < nunits; ++i)
3306                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3307                                          mask[i]);
3308                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3309                         }
3310                       return false;
3311                     }
3312
3313                   if (!analyze_only)
3314                     {
3315                       int l;
3316                       tree mask_vec, *mask_elts;
3317                       mask_elts = XALLOCAVEC (tree, nunits);
3318                       for (l = 0; l < nunits; ++l)
3319                         mask_elts[l] = build_int_cst (mask_element_type,
3320                                                       mask[l]);
3321                       mask_vec = build_vector (mask_type, mask_elts);
3322
3323                       if (need_next_vector)
3324                         {
3325                           first_vec_index = second_vec_index;
3326                           second_vec_index = vec_index;
3327                         }
3328
3329                       next_scalar_stmt
3330                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3331
3332                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
3333                                mask_vec, first_vec_index, second_vec_index,
3334                                gsi, node, vectype, dr_chain,
3335                                ncopies, vect_stmts_counter++);
3336                     }
3337                 }
3338             }
3339         }
3340     }
3341
3342   return true;
3343 }
3344
3345
3346
3347 /* Vectorize SLP instance tree in postorder.  */
3348
3349 static bool
3350 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3351                             unsigned int vectorization_factor)
3352 {
3353   gimple stmt;
3354   bool grouped_store, is_store;
3355   gimple_stmt_iterator si;
3356   stmt_vec_info stmt_info;
3357   unsigned int vec_stmts_size, nunits, group_size;
3358   tree vectype;
3359   int i;
3360   slp_tree child;
3361
3362   if (!node)
3363     return false;
3364
3365   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3366     vect_schedule_slp_instance (child, instance, vectorization_factor);
3367
3368   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3369   stmt_info = vinfo_for_stmt (stmt);
3370
3371   /* VECTYPE is the type of the destination.  */
3372   vectype = STMT_VINFO_VECTYPE (stmt_info);
3373   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3374   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3375
3376   /* For each SLP instance calculate number of vector stmts to be created
3377      for the scalar stmts in each node of the SLP tree.  Number of vector
3378      elements in one vector iteration is the number of scalar elements in
3379      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3380      size.
3381      Unless this is a SLP reduction in which case the number of vector
3382      stmts is equal to the number of vector stmts of the children.  */
3383   if (GROUP_FIRST_ELEMENT (stmt_info)
3384       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3385     vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3386   else
3387     vec_stmts_size = (vectorization_factor * group_size) / nunits;
3388
3389   if (!SLP_TREE_VEC_STMTS (node).exists ())
3390     {
3391       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3392       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3393     }
3394
3395   if (dump_enabled_p ())
3396     {
3397       dump_printf_loc (MSG_NOTE,vect_location,
3398                        "------>vectorizing SLP node starting from: ");
3399       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3400       dump_printf (MSG_NOTE, "\n");
3401     }
3402
3403   /* Vectorized stmts go before the last scalar stmt which is where
3404      all uses are ready.  */
3405   si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3406
3407   /* Mark the first element of the reduction chain as reduction to properly
3408      transform the node.  In the analysis phase only the last element of the
3409      chain is marked as reduction.  */
3410   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3411       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3412     {
3413       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3414       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3415     }
3416
3417   /* Handle two-operation SLP nodes by vectorizing the group with
3418      both operations and then performing a merge.  */
3419   if (SLP_TREE_TWO_OPERATORS (node))
3420     {
3421       enum tree_code code0 = gimple_assign_rhs_code (stmt);
3422       enum tree_code ocode;
3423       gimple ostmt;
3424       unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3425       bool allsame = true;
3426       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3427         if (gimple_assign_rhs_code (ostmt) != code0)
3428           {
3429             mask[i] = 1;
3430             allsame = false;
3431             ocode = gimple_assign_rhs_code (ostmt);
3432           }
3433         else
3434           mask[i] = 0;
3435       if (!allsame)
3436         {
3437           vec<gimple> v0;
3438           vec<gimple> v1;
3439           unsigned j;
3440           tree tmask = NULL_TREE;
3441           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3442           v0 = SLP_TREE_VEC_STMTS (node).copy ();
3443           SLP_TREE_VEC_STMTS (node).truncate (0);
3444           gimple_assign_set_rhs_code (stmt, ocode);
3445           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3446           gimple_assign_set_rhs_code (stmt, code0);
3447           v1 = SLP_TREE_VEC_STMTS (node).copy ();
3448           SLP_TREE_VEC_STMTS (node).truncate (0);
3449           tree meltype = build_nonstandard_integer_type
3450               (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3451           tree mvectype = get_same_sized_vectype (meltype, vectype);
3452           unsigned k = 0, l;
3453           for (j = 0; j < v0.length (); ++j)
3454             {
3455               tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3456               for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3457                 {
3458                   if (k >= group_size)
3459                     k = 0;
3460                   melts[l] = build_int_cst
3461                       (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3462                 }
3463               tmask = build_vector (mvectype, melts);
3464
3465               /* ???  Not all targets support a VEC_PERM_EXPR with a
3466                  constant mask that would translate to a vec_merge RTX
3467                  (with their vec_perm_const_ok).  We can either not
3468                  vectorize in that case or let veclower do its job.
3469                  Unfortunately that isn't too great and at least for
3470                  plus/minus we'd eventually like to match targets
3471                  vector addsub instructions.  */
3472               gimple vstmt;
3473               vstmt = gimple_build_assign (make_ssa_name (vectype),
3474                                            VEC_PERM_EXPR,
3475                                            gimple_assign_lhs (v0[j]),
3476                                            gimple_assign_lhs (v1[j]), tmask);
3477               vect_finish_stmt_generation (stmt, vstmt, &si);
3478               SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3479             }
3480           v0.release ();
3481           v1.release ();
3482           return false;
3483         }
3484     }
3485   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3486   return is_store;
3487 }
3488
3489 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3490    For loop vectorization this is done in vectorizable_call, but for SLP
3491    it needs to be deferred until end of vect_schedule_slp, because multiple
3492    SLP instances may refer to the same scalar stmt.  */
3493
3494 static void
3495 vect_remove_slp_scalar_calls (slp_tree node)
3496 {
3497   gimple stmt, new_stmt;
3498   gimple_stmt_iterator gsi;
3499   int i;
3500   slp_tree child;
3501   tree lhs;
3502   stmt_vec_info stmt_info;
3503
3504   if (!node)
3505     return;
3506
3507   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3508     vect_remove_slp_scalar_calls (child);
3509
3510   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3511     {
3512       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3513         continue;
3514       stmt_info = vinfo_for_stmt (stmt);
3515       if (stmt_info == NULL
3516           || is_pattern_stmt_p (stmt_info)
3517           || !PURE_SLP_STMT (stmt_info))
3518         continue;
3519       lhs = gimple_call_lhs (stmt);
3520       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3521       set_vinfo_for_stmt (new_stmt, stmt_info);
3522       set_vinfo_for_stmt (stmt, NULL);
3523       STMT_VINFO_STMT (stmt_info) = new_stmt;
3524       gsi = gsi_for_stmt (stmt);
3525       gsi_replace (&gsi, new_stmt, false);
3526       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3527     }
3528 }
3529
3530 /* Generate vector code for all SLP instances in the loop/basic block.  */
3531
3532 bool
3533 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3534 {
3535   vec<slp_instance> slp_instances;
3536   slp_instance instance;
3537   unsigned int i, vf;
3538   bool is_store = false;
3539
3540   if (loop_vinfo)
3541     {
3542       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3543       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3544     }
3545   else
3546     {
3547       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3548       vf = 1;
3549     }
3550
3551   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3552     {
3553       /* Schedule the tree of INSTANCE.  */
3554       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3555                                              instance, vf);
3556       if (dump_enabled_p ())
3557         dump_printf_loc (MSG_NOTE, vect_location,
3558                          "vectorizing stmts using SLP.\n");
3559     }
3560
3561   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3562     {
3563       slp_tree root = SLP_INSTANCE_TREE (instance);
3564       gimple store;
3565       unsigned int j;
3566       gimple_stmt_iterator gsi;
3567
3568       /* Remove scalar call stmts.  Do not do this for basic-block
3569          vectorization as not all uses may be vectorized.
3570          ???  Why should this be necessary?  DCE should be able to
3571          remove the stmts itself.
3572          ???  For BB vectorization we can as well remove scalar
3573          stmts starting from the SLP tree root if they have no
3574          uses.  */
3575       if (loop_vinfo)
3576         vect_remove_slp_scalar_calls (root);
3577
3578       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3579                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3580         {
3581           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3582             break;
3583
3584          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3585            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3586           /* Free the attached stmt_vec_info and remove the stmt.  */
3587           gsi = gsi_for_stmt (store);
3588           unlink_stmt_vdef (store);
3589           gsi_remove (&gsi, true);
3590           release_defs (store);
3591           free_stmt_vec_info (store);
3592         }
3593     }
3594
3595   return is_store;
3596 }
3597
3598
3599 /* Vectorize the basic block.  */
3600
3601 void
3602 vect_slp_transform_bb (basic_block bb)
3603 {
3604   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3605   gimple_stmt_iterator si;
3606
3607   gcc_assert (bb_vinfo);
3608
3609   if (dump_enabled_p ())
3610     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3611
3612   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3613     {
3614       gimple stmt = gsi_stmt (si);
3615       stmt_vec_info stmt_info;
3616
3617       if (dump_enabled_p ())
3618         {
3619           dump_printf_loc (MSG_NOTE, vect_location,
3620                            "------>SLPing statement: ");
3621           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3622           dump_printf (MSG_NOTE, "\n");
3623         }
3624
3625       stmt_info = vinfo_for_stmt (stmt);
3626       gcc_assert (stmt_info);
3627
3628       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3629       if (STMT_SLP_TYPE (stmt_info))
3630         {
3631           vect_schedule_slp (NULL, bb_vinfo);
3632           break;
3633         }
3634     }
3635
3636   if (dump_enabled_p ())
3637     dump_printf_loc (MSG_NOTE, vect_location,
3638                      "BASIC BLOCK VECTORIZED\n");
3639
3640   destroy_bb_vec_info (bb_vinfo);
3641 }