coretypes.h: Include input.h and as-a.h.
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2015 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "alias.h"
28 #include "symtab.h"
29 #include "tree.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "target.h"
33 #include "predict.h"
34 #include "hard-reg-set.h"
35 #include "function.h"
36 #include "basic-block.h"
37 #include "gimple-pretty-print.h"
38 #include "tree-ssa-alias.h"
39 #include "internal-fn.h"
40 #include "gimple-expr.h"
41 #include "gimple.h"
42 #include "gimple-iterator.h"
43 #include "gimple-ssa.h"
44 #include "tree-phinodes.h"
45 #include "ssa-iterators.h"
46 #include "stringpool.h"
47 #include "tree-ssanames.h"
48 #include "tree-pass.h"
49 #include "cfgloop.h"
50 #include "rtl.h"
51 #include "flags.h"
52 #include "insn-config.h"
53 #include "expmed.h"
54 #include "dojump.h"
55 #include "explow.h"
56 #include "calls.h"
57 #include "emit-rtl.h"
58 #include "varasm.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "recog.h"              /* FIXME: for insn_data */
62 #include "insn-codes.h"
63 #include "optabs.h"
64 #include "tree-vectorizer.h"
65 #include "langhooks.h"
66 #include "gimple-walk.h"
67
68 /* Extract the location of the basic block in the source code.
69    Return the basic block location if succeed and NULL if not.  */
70
71 source_location
72 find_bb_location (basic_block bb)
73 {
74   gimple stmt = NULL;
75   gimple_stmt_iterator si;
76
77   if (!bb)
78     return UNKNOWN_LOCATION;
79
80   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
81     {
82       stmt = gsi_stmt (si);
83       if (gimple_location (stmt) != UNKNOWN_LOCATION)
84         return gimple_location (stmt);
85     }
86
87   return UNKNOWN_LOCATION;
88 }
89
90
91 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
92
93 static void
94 vect_free_slp_tree (slp_tree node)
95 {
96   int i;
97   slp_tree child;
98
99   if (!node)
100     return;
101
102   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
103     vect_free_slp_tree (child);
104
105   SLP_TREE_CHILDREN (node).release ();
106   SLP_TREE_SCALAR_STMTS (node).release ();
107   SLP_TREE_VEC_STMTS (node).release ();
108   SLP_TREE_LOAD_PERMUTATION (node).release ();
109
110   free (node);
111 }
112
113
114 /* Free the memory allocated for the SLP instance.  */
115
116 void
117 vect_free_slp_instance (slp_instance instance)
118 {
119   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
120   SLP_INSTANCE_LOADS (instance).release ();
121   free (instance);
122 }
123
124
125 /* Create an SLP node for SCALAR_STMTS.  */
126
127 static slp_tree
128 vect_create_new_slp_node (vec<gimple> scalar_stmts)
129 {
130   slp_tree node;
131   gimple stmt = scalar_stmts[0];
132   unsigned int nops;
133
134   if (is_gimple_call (stmt))
135     nops = gimple_call_num_args (stmt);
136   else if (is_gimple_assign (stmt))
137     {
138       nops = gimple_num_ops (stmt) - 1;
139       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
140         nops++;
141     }
142   else
143     return NULL;
144
145   node = XNEW (struct _slp_tree);
146   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
147   SLP_TREE_VEC_STMTS (node).create (0);
148   SLP_TREE_CHILDREN (node).create (nops);
149   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
150   SLP_TREE_TWO_OPERATORS (node) = false;
151
152   return node;
153 }
154
155
156 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
157    operand.  */
158 static vec<slp_oprnd_info> 
159 vect_create_oprnd_info (int nops, int group_size)
160 {
161   int i;
162   slp_oprnd_info oprnd_info;
163   vec<slp_oprnd_info> oprnds_info;
164
165   oprnds_info.create (nops);
166   for (i = 0; i < nops; i++)
167     {
168       oprnd_info = XNEW (struct _slp_oprnd_info);
169       oprnd_info->def_stmts.create (group_size);
170       oprnd_info->first_dt = vect_uninitialized_def;
171       oprnd_info->first_op_type = NULL_TREE;
172       oprnd_info->first_pattern = false;
173       oprnd_info->second_pattern = false;
174       oprnds_info.quick_push (oprnd_info);
175     }
176
177   return oprnds_info;
178 }
179
180
181 /* Free operands info.  */
182
183 static void
184 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
185 {
186   int i;
187   slp_oprnd_info oprnd_info;
188
189   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
190     {
191       oprnd_info->def_stmts.release ();
192       XDELETE (oprnd_info);
193     }
194
195   oprnds_info.release ();
196 }
197
198
199 /* Find the place of the data-ref in STMT in the interleaving chain that starts
200    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
201
202 static int
203 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
204 {
205   gimple next_stmt = first_stmt;
206   int result = 0;
207
208   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
209     return -1;
210
211   do
212     {
213       if (next_stmt == stmt)
214         return result;
215       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
216       if (next_stmt)
217         result += GROUP_GAP (vinfo_for_stmt (next_stmt));
218     }
219   while (next_stmt);
220
221   return -1;
222 }
223
224
225 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
226    they are of a valid type and that they match the defs of the first stmt of
227    the SLP group (stored in OPRNDS_INFO).  If there was a fatal error
228    return -1, if the error could be corrected by swapping operands of the
229    operation return 1, if everything is ok return 0.  */
230
231 static int 
232 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
233                              gimple stmt, unsigned stmt_num,
234                              vec<slp_oprnd_info> *oprnds_info)
235 {
236   tree oprnd;
237   unsigned int i, number_of_oprnds;
238   tree def;
239   gimple def_stmt;
240   enum vect_def_type dt = vect_uninitialized_def;
241   struct loop *loop = NULL;
242   bool pattern = false;
243   slp_oprnd_info oprnd_info;
244   int first_op_idx = 1;
245   bool commutative = false;
246   bool first_op_cond = false;
247   bool first = stmt_num == 0;
248   bool second = stmt_num == 1;
249
250   if (loop_vinfo)
251     loop = LOOP_VINFO_LOOP (loop_vinfo);
252
253   if (is_gimple_call (stmt))
254     {
255       number_of_oprnds = gimple_call_num_args (stmt);
256       first_op_idx = 3;
257     }
258   else if (is_gimple_assign (stmt))
259     {
260       enum tree_code code = gimple_assign_rhs_code (stmt);
261       number_of_oprnds = gimple_num_ops (stmt) - 1;
262       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
263         {
264           first_op_cond = true;
265           commutative = true;
266           number_of_oprnds++;
267         }
268       else
269         commutative = commutative_tree_code (code);
270     }
271   else
272     return -1;
273
274   bool swapped = false;
275   for (i = 0; i < number_of_oprnds; i++)
276     {
277 again:
278       if (first_op_cond)
279         {
280           if (i == 0 || i == 1)
281             oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
282                                   swapped ? !i : i);
283           else
284             oprnd = gimple_op (stmt, first_op_idx + i - 1);
285         }
286       else
287         oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
288
289       oprnd_info = (*oprnds_info)[i];
290
291       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
292                                &def, &dt))
293         {
294           if (dump_enabled_p ())
295             {
296               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
297                                "Build SLP failed: can't analyze def for ");
298               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
299               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
300             }
301
302           return -1;
303         }
304
305       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
306          from the pattern.  Check that all the stmts of the node are in the
307          pattern.  */
308       if (def_stmt && gimple_bb (def_stmt)
309           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
310               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
311                   && gimple_code (def_stmt) != GIMPLE_PHI))
312           && vinfo_for_stmt (def_stmt)
313           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
314           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
315           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
316         {
317           pattern = true;
318           if (!first && !oprnd_info->first_pattern
319               /* Allow different pattern state for the defs of the
320                  first stmt in reduction chains.  */
321               && (oprnd_info->first_dt != vect_reduction_def
322                   || (!second && !oprnd_info->second_pattern)))
323             {
324               if (i == 0
325                   && !swapped
326                   && commutative)
327                 {
328                   swapped = true;
329                   goto again;
330                 }
331
332               if (dump_enabled_p ())
333                 {
334                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
335                                    "Build SLP failed: some of the stmts"
336                                    " are in a pattern, and others are not ");
337                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
338                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
339                 }
340
341               return 1;
342             }
343
344           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
345           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
346
347           if (dt == vect_unknown_def_type)
348             {
349               if (dump_enabled_p ())
350                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
351                                  "Unsupported pattern.\n");
352               return -1;
353             }
354
355           switch (gimple_code (def_stmt))
356             {
357               case GIMPLE_PHI:
358                 def = gimple_phi_result (def_stmt);
359                 break;
360
361               case GIMPLE_ASSIGN:
362                 def = gimple_assign_lhs (def_stmt);
363                 break;
364
365               default:
366                 if (dump_enabled_p ())
367                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
368                                    "unsupported defining stmt:\n");
369                 return -1;
370             }
371         }
372
373       if (second)
374         oprnd_info->second_pattern = pattern;
375
376       if (first)
377         {
378           oprnd_info->first_dt = dt;
379           oprnd_info->first_pattern = pattern;
380           oprnd_info->first_op_type = TREE_TYPE (oprnd);
381         }
382       else
383         {
384           /* Not first stmt of the group, check that the def-stmt/s match
385              the def-stmt/s of the first stmt.  Allow different definition
386              types for reduction chains: the first stmt must be a
387              vect_reduction_def (a phi node), and the rest
388              vect_internal_def.  */
389           if (((oprnd_info->first_dt != dt
390                 && !(oprnd_info->first_dt == vect_reduction_def
391                      && dt == vect_internal_def)
392                 && !((oprnd_info->first_dt == vect_external_def
393                       || oprnd_info->first_dt == vect_constant_def)
394                      && (dt == vect_external_def
395                          || dt == vect_constant_def)))
396                || !types_compatible_p (oprnd_info->first_op_type,
397                                        TREE_TYPE (oprnd))))
398             {
399               /* Try swapping operands if we got a mismatch.  */
400               if (i == 0
401                   && !swapped
402                   && commutative)
403                 {
404                   swapped = true;
405                   goto again;
406                 }
407
408               if (dump_enabled_p ())
409                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
410                                  "Build SLP failed: different types\n");
411
412               return 1;
413             }
414         }
415
416       /* Check the types of the definitions.  */
417       switch (dt)
418         {
419         case vect_constant_def:
420         case vect_external_def:
421         case vect_reduction_def:
422           break;
423
424         case vect_internal_def:
425           oprnd_info->def_stmts.quick_push (def_stmt);
426           break;
427
428         default:
429           /* FORNOW: Not supported.  */
430           if (dump_enabled_p ())
431             {
432               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
433                                "Build SLP failed: illegal type of def ");
434               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
435               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
436             }
437
438           return -1;
439         }
440     }
441
442   /* Swap operands.  */
443   if (swapped)
444     {
445       if (first_op_cond)
446         {
447           tree cond = gimple_assign_rhs1 (stmt);
448           swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
449                              &TREE_OPERAND (cond, 1));
450           TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
451         }
452       else
453         swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
454                            gimple_assign_rhs2_ptr (stmt));
455     }
456
457   return 0;
458 }
459
460
461 /* Verify if the scalar stmts STMTS are isomorphic, require data
462    permutation or are of unsupported types of operation.  Return
463    true if they are, otherwise return false and indicate in *MATCHES
464    which stmts are not isomorphic to the first one.  If MATCHES[0]
465    is false then this indicates the comparison could not be
466    carried out or the stmts will never be vectorized by SLP.  */
467
468 static bool
469 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
470                        vec<gimple> stmts, unsigned int group_size,
471                        unsigned nops, unsigned int *max_nunits,
472                        unsigned int vectorization_factor, bool *matches,
473                        bool *two_operators)
474 {
475   unsigned int i;
476   gimple first_stmt = stmts[0], stmt = stmts[0];
477   enum tree_code first_stmt_code = ERROR_MARK;
478   enum tree_code alt_stmt_code = ERROR_MARK;
479   enum tree_code rhs_code = ERROR_MARK;
480   enum tree_code first_cond_code = ERROR_MARK;
481   tree lhs;
482   bool need_same_oprnds = false;
483   tree vectype, scalar_type, first_op1 = NULL_TREE;
484   optab optab;
485   int icode;
486   machine_mode optab_op2_mode;
487   machine_mode vec_mode;
488   struct data_reference *first_dr;
489   HOST_WIDE_INT dummy;
490   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
491   tree cond;
492
493   /* For every stmt in NODE find its def stmt/s.  */
494   FOR_EACH_VEC_ELT (stmts, i, stmt)
495     {
496       matches[i] = false;
497
498       if (dump_enabled_p ())
499         {
500           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
501           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
502           dump_printf (MSG_NOTE, "\n");
503         }
504
505       /* Fail to vectorize statements marked as unvectorizable.  */
506       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
507         {
508           if (dump_enabled_p ())
509             {
510               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
511                                "Build SLP failed: unvectorizable statement ");
512               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
513               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
514             }
515           /* Fatal mismatch.  */
516           matches[0] = false;
517           return false;
518         }
519
520       lhs = gimple_get_lhs (stmt);
521       if (lhs == NULL_TREE)
522         {
523           if (dump_enabled_p ())
524             {
525               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
526                                "Build SLP failed: not GIMPLE_ASSIGN nor "
527                                "GIMPLE_CALL ");
528               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
529               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
530             }
531           /* Fatal mismatch.  */
532           matches[0] = false;
533           return false;
534         }
535
536        if (is_gimple_assign (stmt)
537            && gimple_assign_rhs_code (stmt) == COND_EXPR
538            && (cond = gimple_assign_rhs1 (stmt))
539            && !COMPARISON_CLASS_P (cond))
540         {
541           if (dump_enabled_p ())
542             {
543               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
544                                "Build SLP failed: condition is not "
545                                "comparison ");
546               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
547               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
548             }
549           /* Fatal mismatch.  */
550           matches[0] = false;
551           return false;
552         }
553
554       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
555       vectype = get_vectype_for_scalar_type (scalar_type);
556       if (!vectype)
557         {
558           if (dump_enabled_p ())
559             {
560               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
561                                "Build SLP failed: unsupported data-type ");
562               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
563                                  scalar_type);
564               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
565             }
566           /* Fatal mismatch.  */
567           matches[0] = false;
568           return false;
569         }
570
571       /* If populating the vector type requires unrolling then fail
572          before adjusting *max_nunits for basic-block vectorization.  */
573       if (bb_vinfo
574           && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
575         {
576           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
577                            "Build SLP failed: unrolling required "
578                            "in basic block SLP\n");
579           /* Fatal mismatch.  */
580           matches[0] = false;
581           return false;
582         }
583
584       /* In case of multiple types we need to detect the smallest type.  */
585       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
586         {
587           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
588           if (bb_vinfo)
589             vectorization_factor = *max_nunits;
590         }
591
592       if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
593         {
594           rhs_code = CALL_EXPR;
595           if (gimple_call_internal_p (call_stmt)
596               || gimple_call_tail_p (call_stmt)
597               || gimple_call_noreturn_p (call_stmt)
598               || !gimple_call_nothrow_p (call_stmt)
599               || gimple_call_chain (call_stmt))
600             {
601               if (dump_enabled_p ())
602                 {
603                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
604                                    "Build SLP failed: unsupported call type ");
605                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
606                                     call_stmt, 0);
607                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
608                 }
609               /* Fatal mismatch.  */
610               matches[0] = false;
611               return false;
612             }
613         }
614       else
615         rhs_code = gimple_assign_rhs_code (stmt);
616
617       /* Check the operation.  */
618       if (i == 0)
619         {
620           first_stmt_code = rhs_code;
621
622           /* Shift arguments should be equal in all the packed stmts for a
623              vector shift with scalar shift operand.  */
624           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
625               || rhs_code == LROTATE_EXPR
626               || rhs_code == RROTATE_EXPR)
627             {
628               vec_mode = TYPE_MODE (vectype);
629
630               /* First see if we have a vector/vector shift.  */
631               optab = optab_for_tree_code (rhs_code, vectype,
632                                            optab_vector);
633
634               if (!optab
635                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
636                 {
637                   /* No vector/vector shift, try for a vector/scalar shift.  */
638                   optab = optab_for_tree_code (rhs_code, vectype,
639                                                optab_scalar);
640
641                   if (!optab)
642                     {
643                       if (dump_enabled_p ())
644                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
645                                          "Build SLP failed: no optab.\n");
646                       /* Fatal mismatch.  */
647                       matches[0] = false;
648                       return false;
649                     }
650                   icode = (int) optab_handler (optab, vec_mode);
651                   if (icode == CODE_FOR_nothing)
652                     {
653                       if (dump_enabled_p ())
654                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
655                                          "Build SLP failed: "
656                                          "op not supported by target.\n");
657                       /* Fatal mismatch.  */
658                       matches[0] = false;
659                       return false;
660                     }
661                   optab_op2_mode = insn_data[icode].operand[2].mode;
662                   if (!VECTOR_MODE_P (optab_op2_mode))
663                     {
664                       need_same_oprnds = true;
665                       first_op1 = gimple_assign_rhs2 (stmt);
666                     }
667                 }
668             }
669           else if (rhs_code == WIDEN_LSHIFT_EXPR)
670             {
671               need_same_oprnds = true;
672               first_op1 = gimple_assign_rhs2 (stmt);
673             }
674         }
675       else
676         {
677           if (first_stmt_code != rhs_code
678               && alt_stmt_code == ERROR_MARK)
679             alt_stmt_code = rhs_code;
680           if (first_stmt_code != rhs_code
681               && (first_stmt_code != IMAGPART_EXPR
682                   || rhs_code != REALPART_EXPR)
683               && (first_stmt_code != REALPART_EXPR
684                   || rhs_code != IMAGPART_EXPR)
685               /* Handle mismatches in plus/minus by computing both
686                  and merging the results.  */
687               && !((first_stmt_code == PLUS_EXPR
688                     || first_stmt_code == MINUS_EXPR)
689                    && (alt_stmt_code == PLUS_EXPR
690                        || alt_stmt_code == MINUS_EXPR)
691                    && rhs_code == alt_stmt_code)
692               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
693                    && (first_stmt_code == ARRAY_REF
694                        || first_stmt_code == BIT_FIELD_REF
695                        || first_stmt_code == INDIRECT_REF
696                        || first_stmt_code == COMPONENT_REF
697                        || first_stmt_code == MEM_REF)))
698             {
699               if (dump_enabled_p ())
700                 {
701                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
702                                    "Build SLP failed: different operation "
703                                    "in stmt ");
704                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
705                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
706                                    "original stmt ");
707                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
708                                     first_stmt, 0);
709                 }
710               /* Mismatch.  */
711               continue;
712             }
713
714           if (need_same_oprnds
715               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
716             {
717               if (dump_enabled_p ())
718                 {
719                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
720                                    "Build SLP failed: different shift "
721                                    "arguments in ");
722                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
723                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
724                 }
725               /* Mismatch.  */
726               continue;
727             }
728
729           if (rhs_code == CALL_EXPR)
730             {
731               gimple first_stmt = stmts[0];
732               if (gimple_call_num_args (stmt) != nops
733                   || !operand_equal_p (gimple_call_fn (first_stmt),
734                                        gimple_call_fn (stmt), 0)
735                   || gimple_call_fntype (first_stmt)
736                      != gimple_call_fntype (stmt))
737                 {
738                   if (dump_enabled_p ())
739                     {
740                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
741                                        "Build SLP failed: different calls in ");
742                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
743                                         stmt, 0);
744                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
745                     }
746                   /* Mismatch.  */
747                   continue;
748                 }
749             }
750         }
751
752       /* Grouped store or load.  */
753       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
754         {
755           if (REFERENCE_CLASS_P (lhs))
756             {
757               /* Store.  */
758               ;
759             }
760           else
761             {
762               /* Load.  */
763               /* Check that the size of interleaved loads group is not
764                  greater than the SLP group size.  */
765               unsigned ncopies
766                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
767               if (loop_vinfo
768                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
769                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
770                        - GROUP_GAP (vinfo_for_stmt (stmt)))
771                       > ncopies * group_size))
772                 {
773                   if (dump_enabled_p ())
774                     {
775                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
776                                        "Build SLP failed: the number "
777                                        "of interleaved loads is greater than "
778                                        "the SLP group size ");
779                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
780                                         stmt, 0);
781                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
782                     }
783                   /* Fatal mismatch.  */
784                   matches[0] = false;
785                   return false;
786                 }
787
788               old_first_load = first_load;
789               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
790               if (prev_first_load)
791                 {
792                   /* Check that there are no loads from different interleaving
793                      chains in the same node.  */
794                   if (prev_first_load != first_load)
795                     {
796                       if (dump_enabled_p ())
797                         {
798                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
799                                            vect_location, 
800                                            "Build SLP failed: different "
801                                            "interleaving chains in one node ");
802                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
803                                             stmt, 0);
804                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
805                         }
806                       /* Mismatch.  */
807                       continue;
808                     }
809                 }
810               else
811                 prev_first_load = first_load;
812
813               /* In some cases a group of loads is just the same load
814                  repeated N times.  Only analyze its cost once.  */
815               if (first_load == stmt && old_first_load != first_load)
816                 {
817                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
818                   if (vect_supportable_dr_alignment (first_dr, false)
819                       == dr_unaligned_unsupported)
820                     {
821                       if (dump_enabled_p ())
822                         {
823                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
824                                            vect_location, 
825                                            "Build SLP failed: unsupported "
826                                            "unaligned load ");
827                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
828                                             stmt, 0);
829                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
830                         }
831                       /* Fatal mismatch.  */
832                       matches[0] = false;
833                       return false;
834                     }
835                 }
836            }
837         } /* Grouped access.  */
838       else
839         {
840           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
841             {
842               /* Not grouped load.  */
843               if (dump_enabled_p ())
844                 {
845                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
846                                    "Build SLP failed: not grouped load ");
847                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
848                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
849                 }
850
851               /* FORNOW: Not grouped loads are not supported.  */
852               /* Fatal mismatch.  */
853               matches[0] = false;
854               return false;
855             }
856
857           /* Not memory operation.  */
858           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
859               && TREE_CODE_CLASS (rhs_code) != tcc_unary
860               && TREE_CODE_CLASS (rhs_code) != tcc_expression
861               && rhs_code != CALL_EXPR)
862             {
863               if (dump_enabled_p ())
864                 {
865                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
866                                    "Build SLP failed: operation");
867                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
868                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
869                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
870                 }
871               /* Fatal mismatch.  */
872               matches[0] = false;
873               return false;
874             }
875
876           if (rhs_code == COND_EXPR)
877             {
878               tree cond_expr = gimple_assign_rhs1 (stmt);
879
880               if (i == 0)
881                 first_cond_code = TREE_CODE (cond_expr);
882               else if (first_cond_code != TREE_CODE (cond_expr))
883                 {
884                   if (dump_enabled_p ())
885                     {
886                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
887                                        "Build SLP failed: different"
888                                        " operation");
889                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
890                                         stmt, 0);
891                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
892                     }
893                   /* Mismatch.  */
894                   continue;
895                 }
896             }
897         }
898
899       matches[i] = true;
900     }
901
902   for (i = 0; i < group_size; ++i)
903     if (!matches[i])
904       return false;
905
906   /* If we allowed a two-operation SLP node verify the target can cope
907      with the permute we are going to use.  */
908   if (alt_stmt_code != ERROR_MARK
909       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
910     {
911       unsigned char *sel
912         = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
913       for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
914         {
915           sel[i] = i;
916           if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
917             sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
918         }
919       if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
920         {
921           for (i = 0; i < group_size; ++i)
922             if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
923               {
924                 matches[i] = false;
925                 if (dump_enabled_p ())
926                   {
927                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
928                                      "Build SLP failed: different operation "
929                                      "in stmt ");
930                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
931                                       stmts[i], 0);
932                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
933                                      "original stmt ");
934                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
935                                       first_stmt, 0);
936                   }
937               }
938           return false;
939         }
940       *two_operators = true;
941     }
942
943   return true;
944 }
945
946 /* Recursively build an SLP tree starting from NODE.
947    Fail (and return a value not equal to zero) if def-stmts are not
948    isomorphic, require data permutation or are of unsupported types of
949    operation.  Otherwise, return 0.
950    The value returned is the depth in the SLP tree where a mismatch
951    was found.  */
952
953 static bool
954 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
955                      slp_tree *node, unsigned int group_size,
956                      unsigned int *max_nunits,
957                      vec<slp_tree> *loads,
958                      unsigned int vectorization_factor,
959                      bool *matches, unsigned *npermutes, unsigned *tree_size,
960                      unsigned max_tree_size)
961 {
962   unsigned nops, i, this_tree_size = 0;
963   gimple stmt;
964
965   matches[0] = false;
966
967   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
968   if (is_gimple_call (stmt))
969     nops = gimple_call_num_args (stmt);
970   else if (is_gimple_assign (stmt))
971     {
972       nops = gimple_num_ops (stmt) - 1;
973       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
974         nops++;
975     }
976   else
977     return false;
978
979   bool two_operators = false;
980   if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
981                               SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
982                               max_nunits, vectorization_factor, matches,
983                               &two_operators))
984     return false;
985   SLP_TREE_TWO_OPERATORS (*node) = two_operators;
986
987   /* If the SLP node is a load, terminate the recursion.  */
988   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
989       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
990     {
991       loads->safe_push (*node);
992       return true;
993     }
994
995   /* Get at the operands, verifying they are compatible.  */
996   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
997   slp_oprnd_info oprnd_info;
998   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
999     {
1000       switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
1001                                            stmt, i, &oprnds_info))
1002         {
1003         case 0:
1004           break;
1005         case -1:
1006           matches[0] = false;
1007           vect_free_oprnd_info (oprnds_info);
1008           return false;
1009         case 1:
1010           matches[i] = false;
1011           break;
1012         }
1013     }
1014   for (i = 0; i < group_size; ++i)
1015     if (!matches[i])
1016       {
1017         vect_free_oprnd_info (oprnds_info);
1018         return false;
1019       }
1020
1021   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
1022
1023   /* Create SLP_TREE nodes for the definition node/s.  */
1024   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1025     {
1026       slp_tree child;
1027       unsigned old_nloads = loads->length ();
1028       unsigned old_max_nunits = *max_nunits;
1029
1030       if (oprnd_info->first_dt != vect_internal_def)
1031         continue;
1032
1033       if (++this_tree_size > max_tree_size)
1034         {
1035           vect_free_oprnd_info (oprnds_info);
1036           return false;
1037         }
1038
1039       child = vect_create_new_slp_node (oprnd_info->def_stmts);
1040       if (!child)
1041         {
1042           vect_free_oprnd_info (oprnds_info);
1043           return false;
1044         }
1045
1046       if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1047                                group_size, max_nunits, loads,
1048                                vectorization_factor, matches,
1049                                npermutes, &this_tree_size, max_tree_size))
1050         {
1051           /* If we have all children of child built up from scalars then just
1052              throw that away and build it up this node from scalars.  */
1053           if (!SLP_TREE_CHILDREN (child).is_empty ())
1054             {
1055               unsigned int j;
1056               slp_tree grandchild;
1057
1058               FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1059                 if (grandchild != NULL)
1060                   break;
1061               if (!grandchild)
1062                 {
1063                   /* Roll back.  */
1064                   *max_nunits = old_max_nunits;
1065                   loads->truncate (old_nloads);
1066                   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1067                       vect_free_slp_tree (grandchild);
1068                   SLP_TREE_CHILDREN (child).truncate (0);
1069
1070                   dump_printf_loc (MSG_NOTE, vect_location,
1071                                    "Building parent vector operands from "
1072                                    "scalars instead\n");
1073                   oprnd_info->def_stmts = vNULL;
1074                   vect_free_slp_tree (child);
1075                   SLP_TREE_CHILDREN (*node).quick_push (NULL);
1076                   continue;
1077                 }
1078             }
1079
1080           oprnd_info->def_stmts = vNULL;
1081           SLP_TREE_CHILDREN (*node).quick_push (child);
1082           continue;
1083         }
1084
1085       /* If the SLP build failed fatally and we analyze a basic-block
1086          simply treat nodes we fail to build as externally defined
1087          (and thus build vectors from the scalar defs).
1088          The cost model will reject outright expensive cases.
1089          ???  This doesn't treat cases where permutation ultimatively
1090          fails (or we don't try permutation below).  Ideally we'd
1091          even compute a permutation that will end up with the maximum
1092          SLP tree size...  */
1093       if (bb_vinfo
1094           && !matches[0]
1095           /* ???  Rejecting patterns this way doesn't work.  We'd have to
1096              do extra work to cancel the pattern so the uses see the
1097              scalar version.  */
1098           && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1099         {
1100           unsigned int j;
1101           slp_tree grandchild;
1102
1103           /* Roll back.  */
1104           *max_nunits = old_max_nunits;
1105           loads->truncate (old_nloads);
1106           FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1107             vect_free_slp_tree (grandchild);
1108           SLP_TREE_CHILDREN (child).truncate (0);
1109
1110           dump_printf_loc (MSG_NOTE, vect_location,
1111                            "Building vector operands from scalars\n");
1112           oprnd_info->def_stmts = vNULL;
1113           vect_free_slp_tree (child);
1114           SLP_TREE_CHILDREN (*node).quick_push (NULL);
1115           continue;
1116         }
1117
1118       /* If the SLP build for operand zero failed and operand zero
1119          and one can be commutated try that for the scalar stmts
1120          that failed the match.  */
1121       if (i == 0
1122           /* A first scalar stmt mismatch signals a fatal mismatch.  */
1123           && matches[0]
1124           /* ???  For COND_EXPRs we can swap the comparison operands
1125              as well as the arms under some constraints.  */
1126           && nops == 2
1127           && oprnds_info[1]->first_dt == vect_internal_def
1128           && is_gimple_assign (stmt)
1129           && commutative_tree_code (gimple_assign_rhs_code (stmt))
1130           && !SLP_TREE_TWO_OPERATORS (*node)
1131           /* Do so only if the number of not successful permutes was nor more
1132              than a cut-ff as re-trying the recursive match on
1133              possibly each level of the tree would expose exponential
1134              behavior.  */
1135           && *npermutes < 4)
1136         {
1137           unsigned int j;
1138           slp_tree grandchild;
1139
1140           /* Roll back.  */
1141           *max_nunits = old_max_nunits;
1142           loads->truncate (old_nloads);
1143           FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1144             vect_free_slp_tree (grandchild);
1145           SLP_TREE_CHILDREN (child).truncate (0);
1146
1147           /* Swap mismatched definition stmts.  */
1148           dump_printf_loc (MSG_NOTE, vect_location,
1149                            "Re-trying with swapped operands of stmts ");
1150           for (j = 0; j < group_size; ++j)
1151             if (!matches[j])
1152               {
1153                 gimple tem = oprnds_info[0]->def_stmts[j];
1154                 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1155                 oprnds_info[1]->def_stmts[j] = tem;
1156                 dump_printf (MSG_NOTE, "%d ", j);
1157               }
1158           dump_printf (MSG_NOTE, "\n");
1159           /* And try again with scratch 'matches' ... */
1160           bool *tem = XALLOCAVEC (bool, group_size);
1161           if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1162                                    group_size, max_nunits, loads,
1163                                    vectorization_factor,
1164                                    tem, npermutes, &this_tree_size,
1165                                    max_tree_size))
1166             {
1167               /* ... so if successful we can apply the operand swapping
1168                  to the GIMPLE IL.  This is necessary because for example
1169                  vect_get_slp_defs uses operand indexes and thus expects
1170                  canonical operand order.  */
1171               for (j = 0; j < group_size; ++j)
1172                 if (!matches[j])
1173                   {
1174                     gimple stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
1175                     swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1176                                        gimple_assign_rhs2_ptr (stmt));
1177                   }
1178               oprnd_info->def_stmts = vNULL;
1179               SLP_TREE_CHILDREN (*node).quick_push (child);
1180               continue;
1181             }
1182
1183           ++*npermutes;
1184         }
1185
1186       oprnd_info->def_stmts = vNULL;
1187       vect_free_slp_tree (child);
1188       vect_free_oprnd_info (oprnds_info);
1189       return false;
1190     }
1191
1192   if (tree_size)
1193     *tree_size += this_tree_size;
1194
1195   vect_free_oprnd_info (oprnds_info);
1196   return true;
1197 }
1198
1199 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1200
1201 static void
1202 vect_print_slp_tree (int dump_kind, slp_tree node)
1203 {
1204   int i;
1205   gimple stmt;
1206   slp_tree child;
1207
1208   if (!node)
1209     return;
1210
1211   dump_printf (dump_kind, "node ");
1212   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1213     {
1214       dump_printf (dump_kind, "\n\tstmt %d ", i);
1215       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1216     }
1217   dump_printf (dump_kind, "\n");
1218
1219   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1220     vect_print_slp_tree (dump_kind, child);
1221 }
1222
1223
1224 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1225    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1226    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1227    stmts in NODE are to be marked.  */
1228
1229 static void
1230 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1231 {
1232   int i;
1233   gimple stmt;
1234   slp_tree child;
1235
1236   if (!node)
1237     return;
1238
1239   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1240     if (j < 0 || i == j)
1241       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1242
1243   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1244     vect_mark_slp_stmts (child, mark, j);
1245 }
1246
1247
1248 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1249
1250 static void
1251 vect_mark_slp_stmts_relevant (slp_tree node)
1252 {
1253   int i;
1254   gimple stmt;
1255   stmt_vec_info stmt_info;
1256   slp_tree child;
1257
1258   if (!node)
1259     return;
1260
1261   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1262     {
1263       stmt_info = vinfo_for_stmt (stmt);
1264       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1265                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1266       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1267     }
1268
1269   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1270     vect_mark_slp_stmts_relevant (child);
1271 }
1272
1273
1274 /* Rearrange the statements of NODE according to PERMUTATION.  */
1275
1276 static void
1277 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1278                           vec<unsigned> permutation)
1279 {
1280   gimple stmt;
1281   vec<gimple> tmp_stmts;
1282   unsigned int i;
1283   slp_tree child;
1284
1285   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1286     vect_slp_rearrange_stmts (child, group_size, permutation);
1287
1288   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1289   tmp_stmts.create (group_size);
1290   tmp_stmts.quick_grow_cleared (group_size);
1291
1292   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1293     tmp_stmts[permutation[i]] = stmt;
1294
1295   SLP_TREE_SCALAR_STMTS (node).release ();
1296   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1297 }
1298
1299
1300 /* Attempt to reorder stmts in a reduction chain so that we don't
1301    require any load permutation.  Return true if that was possible,
1302    otherwise return false.  */
1303
1304 static bool
1305 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1306 {
1307   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1308   unsigned int i, j;
1309   sbitmap load_index;
1310   unsigned int lidx;
1311   slp_tree node, load;
1312
1313   /* Compare all the permutation sequences to the first one.  We know
1314      that at least one load is permuted.  */
1315   node = SLP_INSTANCE_LOADS (slp_instn)[0];
1316   if (!node->load_permutation.exists ())
1317     return false;
1318   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1319     {
1320       if (!load->load_permutation.exists ())
1321         return false;
1322       FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1323         if (lidx != node->load_permutation[j])
1324           return false;
1325     }
1326
1327   /* Check that the loads in the first sequence are different and there
1328      are no gaps between them.  */
1329   load_index = sbitmap_alloc (group_size);
1330   bitmap_clear (load_index);
1331   FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1332     {
1333       if (bitmap_bit_p (load_index, lidx))
1334         {
1335           sbitmap_free (load_index);
1336           return false;
1337         }
1338       bitmap_set_bit (load_index, lidx);
1339     }
1340   for (i = 0; i < group_size; i++)
1341     if (!bitmap_bit_p (load_index, i))
1342       {
1343         sbitmap_free (load_index);
1344         return false;
1345       }
1346   sbitmap_free (load_index);
1347
1348   /* This permutation is valid for reduction.  Since the order of the
1349      statements in the nodes is not important unless they are memory
1350      accesses, we can rearrange the statements in all the nodes
1351      according to the order of the loads.  */
1352   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1353                             node->load_permutation);
1354
1355   /* We are done, no actual permutations need to be generated.  */
1356   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1357     SLP_TREE_LOAD_PERMUTATION (node).release ();
1358   return true;
1359 }
1360
1361 /* Check if the required load permutations in the SLP instance
1362    SLP_INSTN are supported.  */
1363
1364 static bool
1365 vect_supported_load_permutation_p (slp_instance slp_instn)
1366 {
1367   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1368   unsigned int i, j, k, next;
1369   slp_tree node;
1370   gimple stmt, load, next_load, first_load;
1371   struct data_reference *dr;
1372
1373   if (dump_enabled_p ())
1374     {
1375       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1376       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1377         if (node->load_permutation.exists ())
1378           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1379             dump_printf (MSG_NOTE, "%d ", next);
1380         else
1381           for (k = 0; k < group_size; ++k)
1382             dump_printf (MSG_NOTE, "%d ", k);
1383       dump_printf (MSG_NOTE, "\n");
1384     }
1385
1386   /* In case of reduction every load permutation is allowed, since the order
1387      of the reduction statements is not important (as opposed to the case of
1388      grouped stores).  The only condition we need to check is that all the
1389      load nodes are of the same size and have the same permutation (and then
1390      rearrange all the nodes of the SLP instance according to this 
1391      permutation).  */
1392
1393   /* Check that all the load nodes are of the same size.  */
1394   /* ???  Can't we assert this? */
1395   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1396     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1397       return false;
1398
1399   node = SLP_INSTANCE_TREE (slp_instn);
1400   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1401
1402   /* Reduction (there are no data-refs in the root).
1403      In reduction chain the order of the loads is not important.  */
1404   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1405       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1406     {
1407       if (vect_attempt_slp_rearrange_stmts (slp_instn))
1408         return true;
1409
1410       /* Fallthru to general load permutation handling.  */
1411     }
1412
1413   /* In basic block vectorization we allow any subchain of an interleaving
1414      chain.
1415      FORNOW: not supported in loop SLP because of realignment compications.  */
1416   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1417     {
1418       /* Check whether the loads in an instance form a subchain and thus
1419          no permutation is necessary.  */
1420       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1421         {
1422           if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1423             continue;
1424           bool subchain_p = true;
1425           next_load = NULL;
1426           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1427             {
1428               if (j != 0
1429                   && (next_load != load
1430                       || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1431                 {
1432                   subchain_p = false;
1433                   break;
1434                 }
1435               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1436             }
1437           if (subchain_p)
1438             SLP_TREE_LOAD_PERMUTATION (node).release ();
1439           else
1440             {
1441               /* Verify the permutation can be generated.  */
1442               vec<tree> tem;
1443               if (!vect_transform_slp_perm_load (node, tem, NULL,
1444                                                  1, slp_instn, true))
1445                 {
1446                   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1447                                    vect_location,
1448                                    "unsupported load permutation\n");
1449                   return false;
1450                 }
1451             }
1452         }
1453
1454       /* Check that the alignment of the first load in every subchain, i.e.,
1455          the first statement in every load node, is supported.
1456          ???  This belongs in alignment checking.  */
1457       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1458         {
1459           first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1460           if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1461             {
1462               dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1463               if (vect_supportable_dr_alignment (dr, false)
1464                   == dr_unaligned_unsupported)
1465                 {
1466                   if (dump_enabled_p ())
1467                     {
1468                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1469                                        vect_location,
1470                                        "unsupported unaligned load ");
1471                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1472                                         first_load, 0);
1473                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1474                     }
1475                   return false;
1476                 }
1477             }
1478         }
1479
1480       return true;
1481     }
1482
1483   /* For loop vectorization verify we can generate the permutation.  */
1484   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1485     if (node->load_permutation.exists ()
1486         && !vect_transform_slp_perm_load
1487               (node, vNULL, NULL,
1488                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1489       return false;
1490
1491   return true;
1492 }
1493
1494
1495 /* Find the last store in SLP INSTANCE.  */
1496
1497 static gimple
1498 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1499 {
1500   gimple last = NULL, stmt;
1501
1502   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1503     {
1504       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1505       if (is_pattern_stmt_p (stmt_vinfo))
1506         last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1507       else
1508         last = get_later_stmt (stmt, last);
1509     }
1510
1511   return last;
1512 }
1513
1514 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1515
1516 static void
1517 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1518                          stmt_vector_for_cost *prologue_cost_vec,
1519                          stmt_vector_for_cost *body_cost_vec,
1520                          unsigned ncopies_for_cost)
1521 {
1522   unsigned i;
1523   slp_tree child;
1524   gimple stmt, s;
1525   stmt_vec_info stmt_info;
1526   tree lhs;
1527   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1528
1529   /* Recurse down the SLP tree.  */
1530   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1531     if (child)
1532       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1533                                body_cost_vec, ncopies_for_cost);
1534
1535   /* Look at the first scalar stmt to determine the cost.  */
1536   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1537   stmt_info = vinfo_for_stmt (stmt);
1538   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1539     {
1540       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1541         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1542                                vect_uninitialized_def,
1543                                node, prologue_cost_vec, body_cost_vec);
1544       else
1545         {
1546           int i;
1547           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1548           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1549                                 node, prologue_cost_vec, body_cost_vec);
1550           /* If the load is permuted record the cost for the permutation.
1551              ???  Loads from multiple chains are let through here only
1552              for a single special case involving complex numbers where
1553              in the end no permutation is necessary.  */
1554           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1555             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1556                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1557                 && vect_get_place_in_interleaving_chain
1558                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1559               {
1560                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1561                                   stmt_info, 0, vect_body);
1562                 break;
1563               }
1564         }
1565     }
1566   else
1567     {
1568       record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1569                         stmt_info, 0, vect_body);
1570       if (SLP_TREE_TWO_OPERATORS (node))
1571         {
1572           record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1573                             stmt_info, 0, vect_body);
1574           record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1575                             stmt_info, 0, vect_body);
1576         }
1577     }
1578
1579   /* Scan operands and account for prologue cost of constants/externals.
1580      ???  This over-estimates cost for multiple uses and should be
1581      re-engineered.  */
1582   lhs = gimple_get_lhs (stmt);
1583   for (i = 0; i < gimple_num_ops (stmt); ++i)
1584     {
1585       tree def, op = gimple_op (stmt, i);
1586       gimple def_stmt;
1587       enum vect_def_type dt;
1588       if (!op || op == lhs)
1589         continue;
1590       if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
1591                               STMT_VINFO_BB_VINFO (stmt_info),
1592                               &def_stmt, &def, &dt))
1593         {
1594           /* Without looking at the actual initializer a vector of
1595              constants can be implemented as load from the constant pool.
1596              ???  We need to pass down stmt_info for a vector type
1597              even if it points to the wrong stmt.  */
1598           if (dt == vect_constant_def)
1599             record_stmt_cost (prologue_cost_vec, 1, vector_load,
1600                               stmt_info, 0, vect_prologue);
1601           else if (dt == vect_external_def)
1602             record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1603                               stmt_info, 0, vect_prologue);
1604         }
1605     }
1606 }
1607
1608 /* Compute the cost for the SLP instance INSTANCE.  */
1609
1610 static void
1611 vect_analyze_slp_cost (slp_instance instance, void *data)
1612 {
1613   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1614   unsigned ncopies_for_cost;
1615   stmt_info_for_cost *si;
1616   unsigned i;
1617
1618   /* Calculate the number of vector stmts to create based on the unrolling
1619      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1620      GROUP_SIZE / NUNITS otherwise.  */
1621   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1622   slp_tree node = SLP_INSTANCE_TREE (instance);
1623   stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1624   /* Adjust the group_size by the vectorization factor which is always one
1625      for basic-block vectorization.  */
1626   if (STMT_VINFO_LOOP_VINFO (stmt_info))
1627     group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1628   unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1629   /* For reductions look at a reduction operand in case the reduction
1630      operation is widening like DOT_PROD or SAD.  */
1631   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1632     {
1633       gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1634       switch (gimple_assign_rhs_code (stmt))
1635         {
1636         case DOT_PROD_EXPR:
1637         case SAD_EXPR:
1638           nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1639                                 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1640           break;
1641         default:;
1642         }
1643     }
1644   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1645
1646   prologue_cost_vec.create (10);
1647   body_cost_vec.create (10);
1648   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1649                            &prologue_cost_vec, &body_cost_vec,
1650                            ncopies_for_cost);
1651
1652   /* Record the prologue costs, which were delayed until we were
1653      sure that SLP was successful.  */
1654   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1655     {
1656       struct _stmt_vec_info *stmt_info
1657         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1658       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1659                             si->misalign, vect_prologue);
1660     }
1661
1662   /* Record the instance's instructions in the target cost model.  */
1663   FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1664     {
1665       struct _stmt_vec_info *stmt_info
1666         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1667       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1668                             si->misalign, vect_body);
1669     }
1670
1671   prologue_cost_vec.release ();
1672   body_cost_vec.release ();
1673 }
1674
1675 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1676    vect_build_slp_tree to build a tree of packed stmts if possible.
1677    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1678
1679 static bool
1680 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1681                            gimple stmt, unsigned max_tree_size)
1682 {
1683   slp_instance new_instance;
1684   slp_tree node;
1685   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1686   unsigned int unrolling_factor = 1, nunits;
1687   tree vectype, scalar_type = NULL_TREE;
1688   gimple next;
1689   unsigned int vectorization_factor = 0;
1690   int i;
1691   unsigned int max_nunits = 0;
1692   vec<slp_tree> loads;
1693   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1694   vec<gimple> scalar_stmts;
1695
1696   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1697     {
1698       if (dr)
1699         {
1700           scalar_type = TREE_TYPE (DR_REF (dr));
1701           vectype = get_vectype_for_scalar_type (scalar_type);
1702         }
1703       else
1704         {
1705           gcc_assert (loop_vinfo);
1706           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1707         }
1708
1709       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1710     }
1711   else
1712     {
1713       gcc_assert (loop_vinfo);
1714       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1715       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1716     }
1717
1718   if (!vectype)
1719     {
1720       if (dump_enabled_p ())
1721         {
1722           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1723                            "Build SLP failed: unsupported data-type ");
1724           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1725           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1726         }
1727
1728       return false;
1729     }
1730
1731   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1732   if (loop_vinfo)
1733     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1734   else
1735     vectorization_factor = nunits;
1736
1737   /* Calculate the unrolling factor.  */
1738   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1739   if (unrolling_factor != 1 && !loop_vinfo)
1740     {
1741       if (dump_enabled_p ())
1742         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1743                          "Build SLP failed: unrolling required in basic"
1744                          " block SLP\n");
1745
1746       return false;
1747     }
1748
1749   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1750   scalar_stmts.create (group_size);
1751   next = stmt;
1752   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1753     {
1754       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1755       while (next)
1756         {
1757           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1758               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1759             scalar_stmts.safe_push (
1760                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1761           else
1762             scalar_stmts.safe_push (next);
1763           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1764         }
1765       /* Mark the first element of the reduction chain as reduction to properly
1766          transform the node.  In the reduction analysis phase only the last
1767          element of the chain is marked as reduction.  */
1768       if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1769         STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1770     }
1771   else
1772     {
1773       /* Collect reduction statements.  */
1774       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1775       for (i = 0; reductions.iterate (i, &next); i++)
1776         scalar_stmts.safe_push (next);
1777     }
1778
1779   node = vect_create_new_slp_node (scalar_stmts);
1780
1781   loads.create (group_size);
1782
1783   /* Build the tree for the SLP instance.  */
1784   bool *matches = XALLOCAVEC (bool, group_size);
1785   unsigned npermutes = 0;
1786   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1787                            &max_nunits, &loads,
1788                            vectorization_factor, matches, &npermutes, NULL,
1789                            max_tree_size))
1790     {
1791       /* Calculate the unrolling factor based on the smallest type.  */
1792       if (max_nunits > nunits)
1793         unrolling_factor = least_common_multiple (max_nunits, group_size)
1794                            / group_size;
1795
1796       if (unrolling_factor != 1 && !loop_vinfo)
1797         {
1798           if (dump_enabled_p ())
1799             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1800                              "Build SLP failed: unrolling required in basic"
1801                              " block SLP\n");
1802           vect_free_slp_tree (node);
1803           loads.release ();
1804           return false;
1805         }
1806
1807       /* Create a new SLP instance.  */
1808       new_instance = XNEW (struct _slp_instance);
1809       SLP_INSTANCE_TREE (new_instance) = node;
1810       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1811       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1812       SLP_INSTANCE_LOADS (new_instance) = loads;
1813
1814       /* Compute the load permutation.  */
1815       slp_tree load_node;
1816       bool loads_permuted = false;
1817       FOR_EACH_VEC_ELT (loads, i, load_node)
1818         {
1819           vec<unsigned> load_permutation;
1820           int j;
1821           gimple load, first_stmt;
1822           bool this_load_permuted = false;
1823           load_permutation.create (group_size);
1824           first_stmt = GROUP_FIRST_ELEMENT
1825               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1826           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1827             {
1828               int load_place
1829                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1830               gcc_assert (load_place != -1);
1831               if (load_place != j)
1832                 this_load_permuted = true;
1833               load_permutation.safe_push (load_place);
1834             }
1835           if (!this_load_permuted
1836               /* The load requires permutation when unrolling exposes
1837                  a gap either because the group is larger than the SLP
1838                  group-size or because there is a gap between the groups.  */
1839               && (unrolling_factor == 1
1840                   || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1841                       && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
1842             {
1843               load_permutation.release ();
1844               continue;
1845             }
1846           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1847           loads_permuted = true;
1848         }
1849
1850       if (loads_permuted)
1851         {
1852           if (!vect_supported_load_permutation_p (new_instance))
1853             {
1854               if (dump_enabled_p ())
1855                 {
1856                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1857                                    "Build SLP failed: unsupported load "
1858                                    "permutation ");
1859                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1860                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1861                 }
1862               vect_free_slp_instance (new_instance);
1863               return false;
1864             }
1865         }
1866
1867
1868       if (loop_vinfo)
1869         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1870       else
1871         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1872
1873       if (dump_enabled_p ())
1874         vect_print_slp_tree (MSG_NOTE, node);
1875
1876       return true;
1877     }
1878
1879   /* Failed to SLP.  */
1880   /* Free the allocated memory.  */
1881   vect_free_slp_tree (node);
1882   loads.release ();
1883
1884   return false;
1885 }
1886
1887
1888 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1889    trees of packed scalar stmts if SLP is possible.  */
1890
1891 bool
1892 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1893                   unsigned max_tree_size)
1894 {
1895   unsigned int i;
1896   vec<gimple> grouped_stores;
1897   vec<gimple> reductions = vNULL;
1898   vec<gimple> reduc_chains = vNULL;
1899   gimple first_element;
1900   bool ok = false;
1901
1902   if (dump_enabled_p ())
1903     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1904
1905   if (loop_vinfo)
1906     {
1907       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1908       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1909       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1910     }
1911   else
1912     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1913
1914   /* Find SLP sequences starting from groups of grouped stores.  */
1915   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1916     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1917                                    max_tree_size))
1918       ok = true;
1919
1920   if (reduc_chains.length () > 0)
1921     {
1922       /* Find SLP sequences starting from reduction chains.  */
1923       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1924         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1925                                        max_tree_size))
1926           ok = true;
1927         else
1928           return false;
1929
1930       /* Don't try to vectorize SLP reductions if reduction chain was
1931          detected.  */
1932       return ok;
1933     }
1934
1935   /* Find SLP sequences starting from groups of reductions.  */
1936   if (reductions.length () > 1
1937       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1938                                     max_tree_size))
1939     ok = true;
1940
1941   return true;
1942 }
1943
1944
1945 /* For each possible SLP instance decide whether to SLP it and calculate overall
1946    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1947    least one instance.  */
1948
1949 bool
1950 vect_make_slp_decision (loop_vec_info loop_vinfo)
1951 {
1952   unsigned int i, unrolling_factor = 1;
1953   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1954   slp_instance instance;
1955   int decided_to_slp = 0;
1956
1957   if (dump_enabled_p ())
1958     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1959                      "\n");
1960
1961   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1962     {
1963       /* FORNOW: SLP if you can.  */
1964       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1965         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1966
1967       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1968          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1969          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1970       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1971       decided_to_slp++;
1972     }
1973
1974   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1975
1976   if (decided_to_slp && dump_enabled_p ())
1977     dump_printf_loc (MSG_NOTE, vect_location,
1978                      "Decided to SLP %d instances. Unrolling factor %d\n",
1979                      decided_to_slp, unrolling_factor);
1980
1981   return (decided_to_slp > 0);
1982 }
1983
1984
1985 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1986    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1987
1988 static void
1989 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
1990 {
1991   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[i];
1992   imm_use_iterator imm_iter;
1993   gimple use_stmt;
1994   stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
1995   slp_tree child;
1996   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1997   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1998   int j;
1999
2000   /* Propagate hybrid down the SLP tree.  */
2001   if (stype == hybrid)
2002     ;
2003   else if (HYBRID_SLP_STMT (stmt_vinfo))
2004     stype = hybrid;
2005   else
2006     {
2007       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
2008       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2009       /* We always get the pattern stmt here, but for immediate
2010          uses we have to use the LHS of the original stmt.  */
2011       gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2012       if (STMT_VINFO_RELATED_STMT (stmt_vinfo))
2013         stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2014       if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2015         FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
2016           {
2017             if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2018               continue;
2019             use_vinfo = vinfo_for_stmt (use_stmt);
2020             if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2021                 && STMT_VINFO_RELATED_STMT (use_vinfo))
2022               use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2023             if (!STMT_SLP_TYPE (use_vinfo)
2024                 && (STMT_VINFO_RELEVANT (use_vinfo)
2025                     || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2026                 && !(gimple_code (use_stmt) == GIMPLE_PHI
2027                      && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2028               stype = hybrid;
2029           }
2030     }
2031
2032   if (stype == hybrid)
2033     {
2034       if (dump_enabled_p ())
2035         {
2036           dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2037           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2038         }
2039       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2040     }
2041
2042   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2043     if (child)
2044       vect_detect_hybrid_slp_stmts (child, i, stype);
2045 }
2046
2047 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
2048
2049 static tree
2050 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2051 {
2052   walk_stmt_info *wi = (walk_stmt_info *)data;
2053   struct loop *loopp = (struct loop *)wi->info;
2054
2055   if (wi->is_lhs)
2056     return NULL_TREE;
2057
2058   if (TREE_CODE (*tp) == SSA_NAME
2059       && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2060     {
2061       gimple def_stmt = SSA_NAME_DEF_STMT (*tp);
2062       if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2063           && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2064         {
2065           if (dump_enabled_p ())
2066             {
2067               dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2068               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2069             }
2070           STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2071         }
2072     }
2073
2074   return NULL_TREE;
2075 }
2076
2077 static tree
2078 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2079                           walk_stmt_info *)
2080 {
2081   /* If the stmt is in a SLP instance then this isn't a reason
2082      to mark use definitions in other SLP instances as hybrid.  */
2083   if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2084     *handled = true;
2085   return NULL_TREE;
2086 }
2087
2088 /* Find stmts that must be both vectorized and SLPed.  */
2089
2090 void
2091 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2092 {
2093   unsigned int i;
2094   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2095   slp_instance instance;
2096
2097   if (dump_enabled_p ())
2098     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2099                      "\n");
2100
2101   /* First walk all pattern stmt in the loop and mark defs of uses as
2102      hybrid because immediate uses in them are not recorded.  */
2103   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2104     {
2105       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2106       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2107            gsi_next (&gsi))
2108         {
2109           gimple stmt = gsi_stmt (gsi);
2110           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2111           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2112             {
2113               walk_stmt_info wi;
2114               memset (&wi, 0, sizeof (wi));
2115               wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2116               gimple_stmt_iterator gsi2
2117                 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2118               walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2119                                 vect_detect_hybrid_slp_1, &wi);
2120               walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2121                                vect_detect_hybrid_slp_2,
2122                                vect_detect_hybrid_slp_1, &wi);
2123             }
2124         }
2125     }
2126
2127   /* Then walk the SLP instance trees marking stmts with uses in
2128      non-SLP stmts as hybrid, also propagating hybrid down the
2129      SLP tree, collecting the above info on-the-fly.  */
2130   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2131     {
2132       for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2133         vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2134                                       i, pure_slp);
2135     }
2136 }
2137
2138
2139 /* Create and initialize a new bb_vec_info struct for BB, as well as
2140    stmt_vec_info structs for all the stmts in it.  */
2141
2142 static bb_vec_info
2143 new_bb_vec_info (basic_block bb)
2144 {
2145   bb_vec_info res = NULL;
2146   gimple_stmt_iterator gsi;
2147
2148   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2149   BB_VINFO_BB (res) = bb;
2150
2151   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2152     {
2153       gimple stmt = gsi_stmt (gsi);
2154       gimple_set_uid (stmt, 0);
2155       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
2156     }
2157
2158   BB_VINFO_GROUPED_STORES (res).create (10);
2159   BB_VINFO_SLP_INSTANCES (res).create (2);
2160   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2161
2162   bb->aux = res;
2163   return res;
2164 }
2165
2166
2167 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2168    stmts in the basic block.  */
2169
2170 static void
2171 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2172 {
2173   vec<slp_instance> slp_instances;
2174   slp_instance instance;
2175   basic_block bb;
2176   gimple_stmt_iterator si;
2177   unsigned i;
2178
2179   if (!bb_vinfo)
2180     return;
2181
2182   bb = BB_VINFO_BB (bb_vinfo);
2183
2184   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2185     {
2186       gimple stmt = gsi_stmt (si);
2187       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2188
2189       if (stmt_info)
2190         /* Free stmt_vec_info.  */
2191         free_stmt_vec_info (stmt);
2192     }
2193
2194   vect_destroy_datarefs (NULL, bb_vinfo);
2195   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2196   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2197   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2198   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2199     vect_free_slp_instance (instance);
2200   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2201   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2202   free (bb_vinfo);
2203   bb->aux = NULL;
2204 }
2205
2206
2207 /* Analyze statements contained in SLP tree node after recursively analyzing
2208    the subtree. Return TRUE if the operations are supported.  */
2209
2210 static bool
2211 vect_slp_analyze_node_operations (slp_tree node)
2212 {
2213   bool dummy;
2214   int i;
2215   gimple stmt;
2216   slp_tree child;
2217
2218   if (!node)
2219     return true;
2220
2221   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2222     if (!vect_slp_analyze_node_operations (child))
2223       return false;
2224
2225   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2226     {
2227       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2228       gcc_assert (stmt_info);
2229       gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2230
2231       if (!vect_analyze_stmt (stmt, &dummy, node))
2232         return false;
2233     }
2234
2235   return true;
2236 }
2237
2238
2239 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
2240    operations are supported. */
2241
2242 bool
2243 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2244 {
2245   slp_instance instance;
2246   int i;
2247
2248   if (dump_enabled_p ())
2249     dump_printf_loc (MSG_NOTE, vect_location,
2250                      "=== vect_slp_analyze_operations ===\n");
2251
2252   for (i = 0; slp_instances.iterate (i, &instance); )
2253     {
2254       if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2255         {
2256           dump_printf_loc (MSG_NOTE, vect_location,
2257                            "removing SLP instance operations starting from: ");
2258           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2259                             SLP_TREE_SCALAR_STMTS
2260                               (SLP_INSTANCE_TREE (instance))[0], 0);
2261           vect_free_slp_instance (instance);
2262           slp_instances.ordered_remove (i);
2263         }
2264       else
2265         {
2266           /* Compute the costs of the SLP instance.  */
2267           vect_analyze_slp_cost (instance, data);
2268           i++;
2269         }
2270     }
2271
2272   if (!slp_instances.length ())
2273     return false;
2274
2275   return true;
2276 }
2277
2278
2279 /* Compute the scalar cost of the SLP node NODE and its children
2280    and return it.  Do not account defs that are marked in LIFE and
2281    update LIFE according to uses of NODE.  */
2282
2283 static unsigned
2284 vect_bb_slp_scalar_cost (basic_block bb,
2285                          slp_tree node, vec<bool, va_heap> *life)
2286 {
2287   unsigned scalar_cost = 0;
2288   unsigned i;
2289   gimple stmt;
2290   slp_tree child;
2291
2292   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2293     {
2294       unsigned stmt_cost;
2295       ssa_op_iter op_iter;
2296       def_operand_p def_p;
2297       stmt_vec_info stmt_info;
2298
2299       if ((*life)[i])
2300         continue;
2301
2302       /* If there is a non-vectorized use of the defs then the scalar
2303          stmt is kept live in which case we do not account it or any
2304          required defs in the SLP children in the scalar cost.  This
2305          way we make the vectorization more costly when compared to
2306          the scalar cost.  */
2307       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2308         {
2309           imm_use_iterator use_iter;
2310           gimple use_stmt;
2311           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2312             if (!is_gimple_debug (use_stmt)
2313                 && (gimple_code (use_stmt) == GIMPLE_PHI
2314                     || gimple_bb (use_stmt) != bb
2315                     || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2316               {
2317                 (*life)[i] = true;
2318                 BREAK_FROM_IMM_USE_STMT (use_iter);
2319               }
2320         }
2321       if ((*life)[i])
2322         continue;
2323
2324       stmt_info = vinfo_for_stmt (stmt);
2325       if (STMT_VINFO_DATA_REF (stmt_info))
2326         {
2327           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2328             stmt_cost = vect_get_stmt_cost (scalar_load);
2329           else
2330             stmt_cost = vect_get_stmt_cost (scalar_store);
2331         }
2332       else
2333         stmt_cost = vect_get_stmt_cost (scalar_stmt);
2334
2335       scalar_cost += stmt_cost;
2336     }
2337
2338   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2339     if (child)
2340       scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2341
2342   return scalar_cost;
2343 }
2344
2345 /* Check if vectorization of the basic block is profitable.  */
2346
2347 static bool
2348 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2349 {
2350   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2351   slp_instance instance;
2352   int i;
2353   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2354   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2355
2356   /* Calculate scalar cost.  */
2357   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2358     {
2359       auto_vec<bool, 20> life;
2360       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2361       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2362                                               SLP_INSTANCE_TREE (instance),
2363                                               &life);
2364     }
2365
2366   /* Complete the target-specific cost calculation.  */
2367   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2368                &vec_inside_cost, &vec_epilogue_cost);
2369
2370   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2371
2372   if (dump_enabled_p ())
2373     {
2374       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2375       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2376                    vec_inside_cost);
2377       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2378       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2379       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
2380     }
2381
2382   /* Vectorization is profitable if its cost is less than the cost of scalar
2383      version.  */
2384   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2385     return false;
2386
2387   return true;
2388 }
2389
2390 /* Check if the basic block can be vectorized.  */
2391
2392 static bb_vec_info
2393 vect_slp_analyze_bb_1 (basic_block bb)
2394 {
2395   bb_vec_info bb_vinfo;
2396   vec<slp_instance> slp_instances;
2397   slp_instance instance;
2398   int i;
2399   int min_vf = 2;
2400   unsigned n_stmts = 0;
2401
2402   bb_vinfo = new_bb_vec_info (bb);
2403   if (!bb_vinfo)
2404     return NULL;
2405
2406   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
2407     {
2408       if (dump_enabled_p ())
2409         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2410                          "not vectorized: unhandled data-ref in basic "
2411                          "block.\n");
2412
2413       destroy_bb_vec_info (bb_vinfo);
2414       return NULL;
2415     }
2416
2417   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2418     {
2419       if (dump_enabled_p ())
2420         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2421                          "not vectorized: not enough data-refs in "
2422                          "basic block.\n");
2423
2424       destroy_bb_vec_info (bb_vinfo);
2425       return NULL;
2426     }
2427
2428   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2429     {
2430      if (dump_enabled_p ())
2431        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432                         "not vectorized: unhandled data access in "
2433                         "basic block.\n");
2434
2435       destroy_bb_vec_info (bb_vinfo);
2436       return NULL;
2437     }
2438
2439   vect_pattern_recog (NULL, bb_vinfo);
2440
2441   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2442     {
2443       if (dump_enabled_p ())
2444         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2445                          "not vectorized: bad data alignment in basic "
2446                          "block.\n");
2447
2448       destroy_bb_vec_info (bb_vinfo);
2449       return NULL;
2450     }
2451
2452   /* Check the SLP opportunities in the basic block, analyze and build SLP
2453      trees.  */
2454   if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
2455     {
2456       if (dump_enabled_p ())
2457         {
2458           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459                            "Failed to SLP the basic block.\n");
2460           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2461                            "not vectorized: failed to find SLP opportunities "
2462                            "in basic block.\n");
2463         }
2464
2465       destroy_bb_vec_info (bb_vinfo);
2466       return NULL;
2467     }
2468
2469   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2470
2471   /* Mark all the statements that we want to vectorize as pure SLP and
2472      relevant.  */
2473   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2474     {
2475       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2476       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2477     }
2478
2479   /* Mark all the statements that we do not want to vectorize.  */
2480   for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2481        !gsi_end_p (gsi); gsi_next (&gsi))
2482     {
2483       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2484       if (STMT_SLP_TYPE (vinfo) != pure_slp)
2485         STMT_VINFO_VECTORIZABLE (vinfo) = false;
2486     }
2487
2488   /* Analyze dependences.  At this point all stmts not participating in
2489      vectorization have to be marked.  Dependence analysis assumes
2490      that we either vectorize all SLP instances or none at all.  */
2491   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2492      {
2493        if (dump_enabled_p ())
2494          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2495                           "not vectorized: unhandled data dependence "
2496                           "in basic block.\n");
2497
2498        destroy_bb_vec_info (bb_vinfo);
2499        return NULL;
2500      }
2501
2502   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2503     {
2504       if (dump_enabled_p ())
2505         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2506                          "not vectorized: unsupported alignment in basic "
2507                          "block.\n");
2508       destroy_bb_vec_info (bb_vinfo);
2509       return NULL;
2510     }
2511
2512   if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2513                                     BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2514     {
2515       if (dump_enabled_p ())
2516         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2517                          "not vectorized: bad operation in basic block.\n");
2518
2519       destroy_bb_vec_info (bb_vinfo);
2520       return NULL;
2521     }
2522
2523   /* Cost model: check if the vectorization is worthwhile.  */
2524   if (!unlimited_cost_model (NULL)
2525       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2526     {
2527       if (dump_enabled_p ())
2528         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529                          "not vectorized: vectorization is not "
2530                          "profitable.\n");
2531
2532       destroy_bb_vec_info (bb_vinfo);
2533       return NULL;
2534     }
2535
2536   if (dump_enabled_p ())
2537     dump_printf_loc (MSG_NOTE, vect_location,
2538                      "Basic block will be vectorized using SLP\n");
2539
2540   return bb_vinfo;
2541 }
2542
2543
2544 bb_vec_info
2545 vect_slp_analyze_bb (basic_block bb)
2546 {
2547   bb_vec_info bb_vinfo;
2548   int insns = 0;
2549   gimple_stmt_iterator gsi;
2550   unsigned int vector_sizes;
2551
2552   if (dump_enabled_p ())
2553     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2554
2555   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2556     {
2557       gimple stmt = gsi_stmt (gsi);
2558       if (!is_gimple_debug (stmt)
2559           && !gimple_nop_p (stmt)
2560           && gimple_code (stmt) != GIMPLE_LABEL)
2561         insns++;
2562     }
2563
2564   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2565     {
2566       if (dump_enabled_p ())
2567         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2568                          "not vectorized: too many instructions in "
2569                          "basic block.\n");
2570
2571       return NULL;
2572     }
2573
2574   /* Autodetect first vector size we try.  */
2575   current_vector_size = 0;
2576   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2577
2578   while (1)
2579     {
2580       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2581       if (bb_vinfo)
2582         return bb_vinfo;
2583
2584       destroy_bb_vec_info (bb_vinfo);
2585
2586       vector_sizes &= ~current_vector_size;
2587       if (vector_sizes == 0
2588           || current_vector_size == 0)
2589         return NULL;
2590
2591       /* Try the next biggest vector size.  */
2592       current_vector_size = 1 << floor_log2 (vector_sizes);
2593       if (dump_enabled_p ())
2594         dump_printf_loc (MSG_NOTE, vect_location,
2595                          "***** Re-trying analysis with "
2596                          "vector size %d\n", current_vector_size);
2597     }
2598 }
2599
2600
2601 /* For constant and loop invariant defs of SLP_NODE this function returns
2602    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2603    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2604    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2605    REDUC_INDEX is the index of the reduction operand in the statements, unless
2606    it is -1.  */
2607
2608 static void
2609 vect_get_constant_vectors (tree op, slp_tree slp_node,
2610                            vec<tree> *vec_oprnds,
2611                            unsigned int op_num, unsigned int number_of_vectors,
2612                            int reduc_index)
2613 {
2614   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2615   gimple stmt = stmts[0];
2616   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2617   unsigned nunits;
2618   tree vec_cst;
2619   tree *elts;
2620   unsigned j, number_of_places_left_in_vector;
2621   tree vector_type;
2622   tree vop;
2623   int group_size = stmts.length ();
2624   unsigned int vec_num, i;
2625   unsigned number_of_copies = 1;
2626   vec<tree> voprnds;
2627   voprnds.create (number_of_vectors);
2628   bool constant_p, is_store;
2629   tree neutral_op = NULL;
2630   enum tree_code code = gimple_expr_code (stmt);
2631   gimple def_stmt;
2632   struct loop *loop;
2633   gimple_seq ctor_seq = NULL;
2634
2635   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2636   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2637
2638   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2639       && reduc_index != -1)
2640     {
2641       op_num = reduc_index;
2642       op = gimple_op (stmt, op_num + 1);
2643       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2644          we need either neutral operands or the original operands.  See
2645          get_initial_def_for_reduction() for details.  */
2646       switch (code)
2647         {
2648           case WIDEN_SUM_EXPR:
2649           case DOT_PROD_EXPR:
2650           case SAD_EXPR:
2651           case PLUS_EXPR:
2652           case MINUS_EXPR:
2653           case BIT_IOR_EXPR:
2654           case BIT_XOR_EXPR:
2655              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2656                neutral_op = build_real (TREE_TYPE (op), dconst0);
2657              else
2658                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2659
2660              break;
2661
2662           case MULT_EXPR:
2663              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2664                neutral_op = build_real (TREE_TYPE (op), dconst1);
2665              else
2666                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2667
2668              break;
2669
2670           case BIT_AND_EXPR:
2671             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2672             break;
2673
2674           /* For MIN/MAX we don't have an easy neutral operand but
2675              the initial values can be used fine here.  Only for
2676              a reduction chain we have to force a neutral element.  */
2677           case MAX_EXPR:
2678           case MIN_EXPR:
2679             if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2680               neutral_op = NULL;
2681             else
2682               {
2683                 def_stmt = SSA_NAME_DEF_STMT (op);
2684                 loop = (gimple_bb (stmt))->loop_father;
2685                 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2686                                                     loop_preheader_edge (loop));
2687               }
2688             break;
2689
2690           default:
2691             gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
2692             neutral_op = NULL;
2693         }
2694     }
2695
2696   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2697     {
2698       is_store = true;
2699       op = gimple_assign_rhs1 (stmt);
2700     }
2701   else
2702     is_store = false;
2703
2704   gcc_assert (op);
2705
2706   if (CONSTANT_CLASS_P (op))
2707     constant_p = true;
2708   else
2709     constant_p = false;
2710
2711   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2712      created vectors. It is greater than 1 if unrolling is performed.
2713
2714      For example, we have two scalar operands, s1 and s2 (e.g., group of
2715      strided accesses of size two), while NUNITS is four (i.e., four scalars
2716      of this type can be packed in a vector).  The output vector will contain
2717      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2718      will be 2).
2719
2720      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2721      containing the operands.
2722
2723      For example, NUNITS is four as before, and the group size is 8
2724      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2725      {s5, s6, s7, s8}.  */
2726
2727   number_of_copies = nunits * number_of_vectors / group_size;
2728
2729   number_of_places_left_in_vector = nunits;
2730   elts = XALLOCAVEC (tree, nunits);
2731   bool place_after_defs = false;
2732   for (j = 0; j < number_of_copies; j++)
2733     {
2734       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2735         {
2736           if (is_store)
2737             op = gimple_assign_rhs1 (stmt);
2738           else
2739             {
2740               switch (code)
2741                 {
2742                   case COND_EXPR:
2743                     if (op_num == 0 || op_num == 1)
2744                       {
2745                         tree cond = gimple_assign_rhs1 (stmt);
2746                         op = TREE_OPERAND (cond, op_num);
2747                       }
2748                     else
2749                       {
2750                         if (op_num == 2)
2751                           op = gimple_assign_rhs2 (stmt);
2752                         else
2753                           op = gimple_assign_rhs3 (stmt);
2754                       }
2755                     break;
2756
2757                   case CALL_EXPR:
2758                     op = gimple_call_arg (stmt, op_num);
2759                     break;
2760
2761                   case LSHIFT_EXPR:
2762                   case RSHIFT_EXPR:
2763                   case LROTATE_EXPR:
2764                   case RROTATE_EXPR:
2765                     op = gimple_op (stmt, op_num + 1);
2766                     /* Unlike the other binary operators, shifts/rotates have
2767                        the shift count being int, instead of the same type as
2768                        the lhs, so make sure the scalar is the right type if
2769                        we are dealing with vectors of
2770                        long long/long/short/char.  */
2771                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2772                       op = fold_convert (TREE_TYPE (vector_type), op);
2773                     break;
2774
2775                   default:
2776                     op = gimple_op (stmt, op_num + 1);
2777                     break;
2778                 }
2779             }
2780
2781           if (reduc_index != -1)
2782             {
2783               loop = (gimple_bb (stmt))->loop_father;
2784               def_stmt = SSA_NAME_DEF_STMT (op);
2785
2786               gcc_assert (loop);
2787
2788               /* Get the def before the loop.  In reduction chain we have only
2789                  one initial value.  */
2790               if ((j != (number_of_copies - 1)
2791                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2792                        && i != 0))
2793                   && neutral_op)
2794                 op = neutral_op;
2795               else
2796                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2797                                             loop_preheader_edge (loop));
2798             }
2799
2800           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2801           number_of_places_left_in_vector--;
2802           tree orig_op = op;
2803           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2804             {
2805               if (CONSTANT_CLASS_P (op))
2806                 {
2807                   op = fold_unary (VIEW_CONVERT_EXPR,
2808                                    TREE_TYPE (vector_type), op);
2809                   gcc_assert (op && CONSTANT_CLASS_P (op));
2810                 }
2811               else
2812                 {
2813                   tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
2814                   gimple init_stmt;
2815                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
2816                   init_stmt
2817                     = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
2818                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2819                   op = new_temp;
2820                 }
2821             }
2822           elts[number_of_places_left_in_vector] = op;
2823           if (!CONSTANT_CLASS_P (op))
2824             constant_p = false;
2825           if (TREE_CODE (orig_op) == SSA_NAME
2826               && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2827               && STMT_VINFO_BB_VINFO (stmt_vinfo)
2828               && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2829                   == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2830             place_after_defs = true;
2831
2832           if (number_of_places_left_in_vector == 0)
2833             {
2834               number_of_places_left_in_vector = nunits;
2835
2836               if (constant_p)
2837                 vec_cst = build_vector (vector_type, elts);
2838               else
2839                 {
2840                   vec<constructor_elt, va_gc> *v;
2841                   unsigned k;
2842                   vec_alloc (v, nunits);
2843                   for (k = 0; k < nunits; ++k)
2844                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2845                   vec_cst = build_constructor (vector_type, v);
2846                 }
2847               tree init;
2848               gimple_stmt_iterator gsi;
2849               if (place_after_defs)
2850                 {
2851                   gsi = gsi_for_stmt
2852                           (vect_find_last_scalar_stmt_in_slp (slp_node));
2853                   init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2854                 }
2855               else
2856                 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
2857               if (ctor_seq != NULL)
2858                 {
2859                   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
2860                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2861                                                         GSI_SAME_STMT);
2862                   ctor_seq = NULL;
2863                 }
2864               voprnds.quick_push (init);
2865               place_after_defs = false;
2866             }
2867         }
2868     }
2869
2870   /* Since the vectors are created in the reverse order, we should invert
2871      them.  */
2872   vec_num = voprnds.length ();
2873   for (j = vec_num; j != 0; j--)
2874     {
2875       vop = voprnds[j - 1];
2876       vec_oprnds->quick_push (vop);
2877     }
2878
2879   voprnds.release ();
2880
2881   /* In case that VF is greater than the unrolling factor needed for the SLP
2882      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2883      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2884      to replicate the vectors.  */
2885   while (number_of_vectors > vec_oprnds->length ())
2886     {
2887       tree neutral_vec = NULL;
2888
2889       if (neutral_op)
2890         {
2891           if (!neutral_vec)
2892             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2893
2894           vec_oprnds->quick_push (neutral_vec);
2895         }
2896       else
2897         {
2898           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2899             vec_oprnds->quick_push (vop);
2900         }
2901     }
2902 }
2903
2904
2905 /* Get vectorized definitions from SLP_NODE that contains corresponding
2906    vectorized def-stmts.  */
2907
2908 static void
2909 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2910 {
2911   tree vec_oprnd;
2912   gimple vec_def_stmt;
2913   unsigned int i;
2914
2915   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2916
2917   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2918     {
2919       gcc_assert (vec_def_stmt);
2920       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2921       vec_oprnds->quick_push (vec_oprnd);
2922     }
2923 }
2924
2925
2926 /* Get vectorized definitions for SLP_NODE.
2927    If the scalar definitions are loop invariants or constants, collect them and
2928    call vect_get_constant_vectors() to create vector stmts.
2929    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2930    must be stored in the corresponding child of SLP_NODE, and we call
2931    vect_get_slp_vect_defs () to retrieve them.  */
2932
2933 void
2934 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2935                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2936 {
2937   gimple first_stmt;
2938   int number_of_vects = 0, i;
2939   unsigned int child_index = 0;
2940   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2941   slp_tree child = NULL;
2942   vec<tree> vec_defs;
2943   tree oprnd;
2944   bool vectorized_defs;
2945
2946   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2947   FOR_EACH_VEC_ELT (ops, i, oprnd)
2948     {
2949       /* For each operand we check if it has vectorized definitions in a child
2950          node or we need to create them (for invariants and constants).  We
2951          check if the LHS of the first stmt of the next child matches OPRND.
2952          If it does, we found the correct child.  Otherwise, we call
2953          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2954          to check this child node for the next operand.  */
2955       vectorized_defs = false;
2956       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2957         {
2958           child = SLP_TREE_CHILDREN (slp_node)[child_index];
2959
2960           /* We have to check both pattern and original def, if available.  */
2961           if (child)
2962             {
2963               gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2964               gimple related
2965                 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2966
2967               if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2968                   || (related
2969                       && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2970                 {
2971                   /* The number of vector defs is determined by the number of
2972                      vector statements in the node from which we get those
2973                      statements.  */
2974                   number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2975                   vectorized_defs = true;
2976                   child_index++;
2977                 }
2978             }
2979           else
2980             child_index++;
2981         }
2982
2983       if (!vectorized_defs)
2984         {
2985           if (i == 0)
2986             {
2987               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2988               /* Number of vector stmts was calculated according to LHS in
2989                  vect_schedule_slp_instance (), fix it by replacing LHS with
2990                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2991                  details.  */
2992               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2993                                              &rhs_size_unit);
2994               if (rhs_size_unit != lhs_size_unit)
2995                 {
2996                   number_of_vects *= rhs_size_unit;
2997                   number_of_vects /= lhs_size_unit;
2998                 }
2999             }
3000         }
3001
3002       /* Allocate memory for vectorized defs.  */
3003       vec_defs = vNULL;
3004       vec_defs.create (number_of_vects);
3005
3006       /* For reduction defs we call vect_get_constant_vectors (), since we are
3007          looking for initial loop invariant values.  */
3008       if (vectorized_defs && reduc_index == -1)
3009         /* The defs are already vectorized.  */
3010         vect_get_slp_vect_defs (child, &vec_defs);
3011       else
3012         /* Build vectors from scalar defs.  */
3013         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3014                                    number_of_vects, reduc_index);
3015
3016       vec_oprnds->quick_push (vec_defs);
3017
3018       /* For reductions, we only need initial values.  */
3019       if (reduc_index != -1)
3020         return;
3021     }
3022 }
3023
3024
3025 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
3026    building a vector of type MASK_TYPE from it) and two input vectors placed in
3027    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3028    shifting by STRIDE elements of DR_CHAIN for every copy.
3029    (STRIDE is the number of vectorized stmts for NODE divided by the number of
3030    copies).
3031    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3032    the created stmts must be inserted.  */
3033
3034 static inline void
3035 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
3036                            tree mask, int first_vec_indx, int second_vec_indx,
3037                            gimple_stmt_iterator *gsi, slp_tree node,
3038                            tree vectype, vec<tree> dr_chain,
3039                            int ncopies, int vect_stmts_counter)
3040 {
3041   tree perm_dest;
3042   gimple perm_stmt = NULL;
3043   stmt_vec_info next_stmt_info;
3044   int i, stride;
3045   tree first_vec, second_vec, data_ref;
3046
3047   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3048
3049   /* Initialize the vect stmts of NODE to properly insert the generated
3050      stmts later.  */
3051   for (i = SLP_TREE_VEC_STMTS (node).length ();
3052        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3053     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3054
3055   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3056   for (i = 0; i < ncopies; i++)
3057     {
3058       first_vec = dr_chain[first_vec_indx];
3059       second_vec = dr_chain[second_vec_indx];
3060
3061       /* Generate the permute statement.  */
3062       perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3063                                        first_vec, second_vec, mask);
3064       data_ref = make_ssa_name (perm_dest, perm_stmt);
3065       gimple_set_lhs (perm_stmt, data_ref);
3066       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3067
3068       /* Store the vector statement in NODE.  */
3069       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
3070
3071       first_vec_indx += stride;
3072       second_vec_indx += stride;
3073     }
3074
3075   /* Mark the scalar stmt as vectorized.  */
3076   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
3077   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
3078 }
3079
3080
3081 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
3082    return in CURRENT_MASK_ELEMENT its equivalent in target specific
3083    representation.  Check that the mask is valid and return FALSE if not.
3084    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3085    the next vector, i.e., the current first vector is not needed.  */
3086
3087 static bool
3088 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
3089                        int mask_nunits, bool only_one_vec, int index,
3090                        unsigned char *mask, int *current_mask_element,
3091                        bool *need_next_vector, int *number_of_mask_fixes,
3092                        bool *mask_fixed, bool *needs_first_vector)
3093 {
3094   int i;
3095
3096   /* Convert to target specific representation.  */
3097   *current_mask_element = first_mask_element + m;
3098   /* Adjust the value in case it's a mask for second and third vectors.  */
3099   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
3100
3101   if (*current_mask_element < 0)
3102     {
3103       if (dump_enabled_p ())
3104         {
3105           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3106                            "permutation requires past vector ");
3107           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3108           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3109         }
3110       return false;
3111     }
3112
3113   if (*current_mask_element < mask_nunits)
3114     *needs_first_vector = true;
3115
3116   /* We have only one input vector to permute but the mask accesses values in
3117      the next vector as well.  */
3118   if (only_one_vec && *current_mask_element >= mask_nunits)
3119     {
3120       if (dump_enabled_p ())
3121         {
3122           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3123                            "permutation requires at least two vectors ");
3124           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3125           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3126         }
3127
3128       return false;
3129     }
3130
3131   /* The mask requires the next vector.  */
3132   while (*current_mask_element >= mask_nunits * 2)
3133     {
3134       if (*needs_first_vector || *mask_fixed)
3135         {
3136           /* We either need the first vector too or have already moved to the
3137              next vector. In both cases, this permutation needs three
3138              vectors.  */
3139           if (dump_enabled_p ())
3140             {
3141               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3142                                "permutation requires at "
3143                                "least three vectors ");
3144               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3145               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3146             }
3147
3148           return false;
3149         }
3150
3151       /* We move to the next vector, dropping the first one and working with
3152          the second and the third - we need to adjust the values of the mask
3153          accordingly.  */
3154       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
3155
3156       for (i = 0; i < index; i++)
3157         mask[i] -= mask_nunits * *number_of_mask_fixes;
3158
3159       (*number_of_mask_fixes)++;
3160       *mask_fixed = true;
3161     }
3162
3163   *need_next_vector = *mask_fixed;
3164
3165   /* This was the last element of this mask. Start a new one.  */
3166   if (index == mask_nunits - 1)
3167     {
3168       *number_of_mask_fixes = 1;
3169       *mask_fixed = false;
3170       *needs_first_vector = false;
3171     }
3172
3173   return true;
3174 }
3175
3176
3177 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3178    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3179    permute statements for the SLP node NODE of the SLP instance
3180    SLP_NODE_INSTANCE.  */
3181
3182 bool
3183 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3184                               gimple_stmt_iterator *gsi, int vf,
3185                               slp_instance slp_node_instance, bool analyze_only)
3186 {
3187   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3188   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3189   tree mask_element_type = NULL_TREE, mask_type;
3190   int i, j, k, nunits, vec_index = 0, scalar_index;
3191   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3192   gimple next_scalar_stmt;
3193   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3194   int first_mask_element;
3195   int index, unroll_factor, current_mask_element, ncopies;
3196   unsigned char *mask;
3197   bool only_one_vec = false, need_next_vector = false;
3198   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
3199   int number_of_mask_fixes = 1;
3200   bool mask_fixed = false;
3201   bool needs_first_vector = false;
3202   machine_mode mode;
3203
3204   mode = TYPE_MODE (vectype);
3205
3206   if (!can_vec_perm_p (mode, false, NULL))
3207     {
3208       if (dump_enabled_p ())
3209         {
3210           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3211                            "no vect permute for ");
3212           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3213           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3214         }
3215       return false;
3216     }
3217
3218   /* The generic VEC_PERM_EXPR code always uses an integral type of the
3219      same size as the vector element being permuted.  */
3220   mask_element_type = lang_hooks.types.type_for_mode
3221                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3222   mask_type = get_vectype_for_scalar_type (mask_element_type);
3223   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3224   mask = XALLOCAVEC (unsigned char, nunits);
3225   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3226
3227   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3228      unrolling factor.  */
3229   orig_vec_stmts_num = group_size *
3230                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
3231   if (orig_vec_stmts_num == 1)
3232     only_one_vec = true;
3233
3234   /* Number of copies is determined by the final vectorization factor
3235      relatively to SLP_NODE_INSTANCE unrolling factor.  */
3236   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3237
3238   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3239     return false;
3240
3241   stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3242
3243   /* Generate permutation masks for every NODE. Number of masks for each NODE
3244      is equal to GROUP_SIZE.
3245      E.g., we have a group of three nodes with three loads from the same
3246      location in each node, and the vector size is 4. I.e., we have a
3247      a0b0c0a1b1c1... sequence and we need to create the following vectors:
3248      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3249      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3250      ...
3251
3252      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3253      The last mask is illegal since we assume two operands for permute
3254      operation, and the mask element values can't be outside that range.
3255      Hence, the last mask must be converted into {2,5,5,5}.
3256      For the first two permutations we need the first and the second input
3257      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3258      we need the second and the third vectors: {b1,c1,a2,b2} and
3259      {c2,a3,b3,c3}.  */
3260
3261   {
3262       scalar_index = 0;
3263       index = 0;
3264       vect_stmts_counter = 0;
3265       vec_index = 0;
3266       first_vec_index = vec_index++;
3267       if (only_one_vec)
3268         second_vec_index = first_vec_index;
3269       else
3270         second_vec_index =  vec_index++;
3271
3272       for (j = 0; j < unroll_factor; j++)
3273         {
3274           for (k = 0; k < group_size; k++)
3275             {
3276               i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3277               first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info);
3278               if (!vect_get_mask_element (stmt, first_mask_element, 0,
3279                                           nunits, only_one_vec, index,
3280                                           mask, &current_mask_element,
3281                                           &need_next_vector,
3282                                           &number_of_mask_fixes, &mask_fixed,
3283                                           &needs_first_vector))
3284                 return false;
3285               gcc_assert (current_mask_element >= 0
3286                           && current_mask_element < 2 * nunits);
3287               mask[index++] = current_mask_element;
3288
3289               if (index == nunits)
3290                 {
3291                   index = 0;
3292                   if (!can_vec_perm_p (mode, false, mask))
3293                     {
3294                       if (dump_enabled_p ())
3295                         {
3296                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3297                                            vect_location, 
3298                                            "unsupported vect permute { ");
3299                           for (i = 0; i < nunits; ++i)
3300                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3301                                          mask[i]);
3302                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3303                         }
3304                       return false;
3305                     }
3306
3307                   if (!analyze_only)
3308                     {
3309                       int l;
3310                       tree mask_vec, *mask_elts;
3311                       mask_elts = XALLOCAVEC (tree, nunits);
3312                       for (l = 0; l < nunits; ++l)
3313                         mask_elts[l] = build_int_cst (mask_element_type,
3314                                                       mask[l]);
3315                       mask_vec = build_vector (mask_type, mask_elts);
3316
3317                       if (need_next_vector)
3318                         {
3319                           first_vec_index = second_vec_index;
3320                           second_vec_index = vec_index;
3321                         }
3322
3323                       next_scalar_stmt
3324                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3325
3326                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
3327                                mask_vec, first_vec_index, second_vec_index,
3328                                gsi, node, vectype, dr_chain,
3329                                ncopies, vect_stmts_counter++);
3330                     }
3331                 }
3332             }
3333         }
3334     }
3335
3336   return true;
3337 }
3338
3339
3340
3341 /* Vectorize SLP instance tree in postorder.  */
3342
3343 static bool
3344 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3345                             unsigned int vectorization_factor)
3346 {
3347   gimple stmt;
3348   bool grouped_store, is_store;
3349   gimple_stmt_iterator si;
3350   stmt_vec_info stmt_info;
3351   unsigned int vec_stmts_size, nunits, group_size;
3352   tree vectype;
3353   int i;
3354   slp_tree child;
3355
3356   if (!node)
3357     return false;
3358
3359   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3360     vect_schedule_slp_instance (child, instance, vectorization_factor);
3361
3362   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3363   stmt_info = vinfo_for_stmt (stmt);
3364
3365   /* VECTYPE is the type of the destination.  */
3366   vectype = STMT_VINFO_VECTYPE (stmt_info);
3367   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3368   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3369
3370   /* For each SLP instance calculate number of vector stmts to be created
3371      for the scalar stmts in each node of the SLP tree.  Number of vector
3372      elements in one vector iteration is the number of scalar elements in
3373      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3374      size.
3375      Unless this is a SLP reduction in which case the number of vector
3376      stmts is equal to the number of vector stmts of the children.  */
3377   if (GROUP_FIRST_ELEMENT (stmt_info)
3378       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3379     vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3380   else
3381     vec_stmts_size = (vectorization_factor * group_size) / nunits;
3382
3383   if (!SLP_TREE_VEC_STMTS (node).exists ())
3384     {
3385       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3386       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3387     }
3388
3389   if (dump_enabled_p ())
3390     {
3391       dump_printf_loc (MSG_NOTE,vect_location,
3392                        "------>vectorizing SLP node starting from: ");
3393       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3394       dump_printf (MSG_NOTE, "\n");
3395     }
3396
3397   /* Vectorized stmts go before the last scalar stmt which is where
3398      all uses are ready.  */
3399   si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3400
3401   /* Mark the first element of the reduction chain as reduction to properly
3402      transform the node.  In the analysis phase only the last element of the
3403      chain is marked as reduction.  */
3404   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3405       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3406     {
3407       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3408       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3409     }
3410
3411   /* Handle two-operation SLP nodes by vectorizing the group with
3412      both operations and then performing a merge.  */
3413   if (SLP_TREE_TWO_OPERATORS (node))
3414     {
3415       enum tree_code code0 = gimple_assign_rhs_code (stmt);
3416       enum tree_code ocode;
3417       gimple ostmt;
3418       unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3419       bool allsame = true;
3420       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3421         if (gimple_assign_rhs_code (ostmt) != code0)
3422           {
3423             mask[i] = 1;
3424             allsame = false;
3425             ocode = gimple_assign_rhs_code (ostmt);
3426           }
3427         else
3428           mask[i] = 0;
3429       if (!allsame)
3430         {
3431           vec<gimple> v0;
3432           vec<gimple> v1;
3433           unsigned j;
3434           tree tmask = NULL_TREE;
3435           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3436           v0 = SLP_TREE_VEC_STMTS (node).copy ();
3437           SLP_TREE_VEC_STMTS (node).truncate (0);
3438           gimple_assign_set_rhs_code (stmt, ocode);
3439           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3440           gimple_assign_set_rhs_code (stmt, code0);
3441           v1 = SLP_TREE_VEC_STMTS (node).copy ();
3442           SLP_TREE_VEC_STMTS (node).truncate (0);
3443           tree meltype = build_nonstandard_integer_type
3444               (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3445           tree mvectype = get_same_sized_vectype (meltype, vectype);
3446           unsigned k = 0, l;
3447           for (j = 0; j < v0.length (); ++j)
3448             {
3449               tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3450               for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3451                 {
3452                   if (k >= group_size)
3453                     k = 0;
3454                   melts[l] = build_int_cst
3455                       (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3456                 }
3457               tmask = build_vector (mvectype, melts);
3458
3459               /* ???  Not all targets support a VEC_PERM_EXPR with a
3460                  constant mask that would translate to a vec_merge RTX
3461                  (with their vec_perm_const_ok).  We can either not
3462                  vectorize in that case or let veclower do its job.
3463                  Unfortunately that isn't too great and at least for
3464                  plus/minus we'd eventually like to match targets
3465                  vector addsub instructions.  */
3466               gimple vstmt;
3467               vstmt = gimple_build_assign (make_ssa_name (vectype),
3468                                            VEC_PERM_EXPR,
3469                                            gimple_assign_lhs (v0[j]),
3470                                            gimple_assign_lhs (v1[j]), tmask);
3471               vect_finish_stmt_generation (stmt, vstmt, &si);
3472               SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3473             }
3474           v0.release ();
3475           v1.release ();
3476           return false;
3477         }
3478     }
3479   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3480   return is_store;
3481 }
3482
3483 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3484    For loop vectorization this is done in vectorizable_call, but for SLP
3485    it needs to be deferred until end of vect_schedule_slp, because multiple
3486    SLP instances may refer to the same scalar stmt.  */
3487
3488 static void
3489 vect_remove_slp_scalar_calls (slp_tree node)
3490 {
3491   gimple stmt, new_stmt;
3492   gimple_stmt_iterator gsi;
3493   int i;
3494   slp_tree child;
3495   tree lhs;
3496   stmt_vec_info stmt_info;
3497
3498   if (!node)
3499     return;
3500
3501   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3502     vect_remove_slp_scalar_calls (child);
3503
3504   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3505     {
3506       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3507         continue;
3508       stmt_info = vinfo_for_stmt (stmt);
3509       if (stmt_info == NULL
3510           || is_pattern_stmt_p (stmt_info)
3511           || !PURE_SLP_STMT (stmt_info))
3512         continue;
3513       lhs = gimple_call_lhs (stmt);
3514       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3515       set_vinfo_for_stmt (new_stmt, stmt_info);
3516       set_vinfo_for_stmt (stmt, NULL);
3517       STMT_VINFO_STMT (stmt_info) = new_stmt;
3518       gsi = gsi_for_stmt (stmt);
3519       gsi_replace (&gsi, new_stmt, false);
3520       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3521     }
3522 }
3523
3524 /* Generate vector code for all SLP instances in the loop/basic block.  */
3525
3526 bool
3527 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3528 {
3529   vec<slp_instance> slp_instances;
3530   slp_instance instance;
3531   unsigned int i, vf;
3532   bool is_store = false;
3533
3534   if (loop_vinfo)
3535     {
3536       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3537       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3538     }
3539   else
3540     {
3541       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3542       vf = 1;
3543     }
3544
3545   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3546     {
3547       /* Schedule the tree of INSTANCE.  */
3548       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3549                                              instance, vf);
3550       if (dump_enabled_p ())
3551         dump_printf_loc (MSG_NOTE, vect_location,
3552                          "vectorizing stmts using SLP.\n");
3553     }
3554
3555   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3556     {
3557       slp_tree root = SLP_INSTANCE_TREE (instance);
3558       gimple store;
3559       unsigned int j;
3560       gimple_stmt_iterator gsi;
3561
3562       /* Remove scalar call stmts.  Do not do this for basic-block
3563          vectorization as not all uses may be vectorized.
3564          ???  Why should this be necessary?  DCE should be able to
3565          remove the stmts itself.
3566          ???  For BB vectorization we can as well remove scalar
3567          stmts starting from the SLP tree root if they have no
3568          uses.  */
3569       if (loop_vinfo)
3570         vect_remove_slp_scalar_calls (root);
3571
3572       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3573                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3574         {
3575           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3576             break;
3577
3578          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3579            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3580           /* Free the attached stmt_vec_info and remove the stmt.  */
3581           gsi = gsi_for_stmt (store);
3582           unlink_stmt_vdef (store);
3583           gsi_remove (&gsi, true);
3584           release_defs (store);
3585           free_stmt_vec_info (store);
3586         }
3587     }
3588
3589   return is_store;
3590 }
3591
3592
3593 /* Vectorize the basic block.  */
3594
3595 void
3596 vect_slp_transform_bb (basic_block bb)
3597 {
3598   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3599   gimple_stmt_iterator si;
3600
3601   gcc_assert (bb_vinfo);
3602
3603   if (dump_enabled_p ())
3604     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3605
3606   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3607     {
3608       gimple stmt = gsi_stmt (si);
3609       stmt_vec_info stmt_info;
3610
3611       if (dump_enabled_p ())
3612         {
3613           dump_printf_loc (MSG_NOTE, vect_location,
3614                            "------>SLPing statement: ");
3615           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3616           dump_printf (MSG_NOTE, "\n");
3617         }
3618
3619       stmt_info = vinfo_for_stmt (stmt);
3620       gcc_assert (stmt_info);
3621
3622       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3623       if (STMT_SLP_TYPE (stmt_info))
3624         {
3625           vect_schedule_slp (NULL, bb_vinfo);
3626           break;
3627         }
3628     }
3629
3630   if (dump_enabled_p ())
3631     dump_printf_loc (MSG_NOTE, vect_location,
3632                      "BASIC BLOCK VECTORIZED\n");
3633
3634   destroy_bb_vec_info (bb_vinfo);
3635 }