tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec member.
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2015 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "basic-block.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-ssa-alias.h"
46 #include "internal-fn.h"
47 #include "gimple-expr.h"
48 #include "is-a.h"
49 #include "gimple.h"
50 #include "gimple-iterator.h"
51 #include "gimple-ssa.h"
52 #include "tree-phinodes.h"
53 #include "ssa-iterators.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-pass.h"
57 #include "cfgloop.h"
58 #include "hashtab.h"
59 #include "rtl.h"
60 #include "flags.h"
61 #include "statistics.h"
62 #include "real.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
65 #include "expmed.h"
66 #include "dojump.h"
67 #include "explow.h"
68 #include "calls.h"
69 #include "emit-rtl.h"
70 #include "varasm.h"
71 #include "stmt.h"
72 #include "expr.h"
73 #include "recog.h"              /* FIXME: for insn_data */
74 #include "insn-codes.h"
75 #include "optabs.h"
76 #include "tree-vectorizer.h"
77 #include "langhooks.h"
78 #include "gimple-walk.h"
79
80 /* Extract the location of the basic block in the source code.
81    Return the basic block location if succeed and NULL if not.  */
82
83 source_location
84 find_bb_location (basic_block bb)
85 {
86   gimple stmt = NULL;
87   gimple_stmt_iterator si;
88
89   if (!bb)
90     return UNKNOWN_LOCATION;
91
92   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
93     {
94       stmt = gsi_stmt (si);
95       if (gimple_location (stmt) != UNKNOWN_LOCATION)
96         return gimple_location (stmt);
97     }
98
99   return UNKNOWN_LOCATION;
100 }
101
102
103 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
104
105 static void
106 vect_free_slp_tree (slp_tree node)
107 {
108   int i;
109   slp_tree child;
110
111   if (!node)
112     return;
113
114   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
115     vect_free_slp_tree (child);
116
117   SLP_TREE_CHILDREN (node).release ();
118   SLP_TREE_SCALAR_STMTS (node).release ();
119   SLP_TREE_VEC_STMTS (node).release ();
120   SLP_TREE_LOAD_PERMUTATION (node).release ();
121
122   free (node);
123 }
124
125
126 /* Free the memory allocated for the SLP instance.  */
127
128 void
129 vect_free_slp_instance (slp_instance instance)
130 {
131   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
132   SLP_INSTANCE_LOADS (instance).release ();
133   free (instance);
134 }
135
136
137 /* Create an SLP node for SCALAR_STMTS.  */
138
139 static slp_tree
140 vect_create_new_slp_node (vec<gimple> scalar_stmts)
141 {
142   slp_tree node;
143   gimple stmt = scalar_stmts[0];
144   unsigned int nops;
145
146   if (is_gimple_call (stmt))
147     nops = gimple_call_num_args (stmt);
148   else if (is_gimple_assign (stmt))
149     {
150       nops = gimple_num_ops (stmt) - 1;
151       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
152         nops++;
153     }
154   else
155     return NULL;
156
157   node = XNEW (struct _slp_tree);
158   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
159   SLP_TREE_VEC_STMTS (node).create (0);
160   SLP_TREE_CHILDREN (node).create (nops);
161   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
162   SLP_TREE_TWO_OPERATORS (node) = false;
163
164   return node;
165 }
166
167
168 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
169    operand.  */
170 static vec<slp_oprnd_info> 
171 vect_create_oprnd_info (int nops, int group_size)
172 {
173   int i;
174   slp_oprnd_info oprnd_info;
175   vec<slp_oprnd_info> oprnds_info;
176
177   oprnds_info.create (nops);
178   for (i = 0; i < nops; i++)
179     {
180       oprnd_info = XNEW (struct _slp_oprnd_info);
181       oprnd_info->def_stmts.create (group_size);
182       oprnd_info->first_dt = vect_uninitialized_def;
183       oprnd_info->first_op_type = NULL_TREE;
184       oprnd_info->first_pattern = false;
185       oprnd_info->second_pattern = false;
186       oprnds_info.quick_push (oprnd_info);
187     }
188
189   return oprnds_info;
190 }
191
192
193 /* Free operands info.  */
194
195 static void
196 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
197 {
198   int i;
199   slp_oprnd_info oprnd_info;
200
201   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
202     {
203       oprnd_info->def_stmts.release ();
204       XDELETE (oprnd_info);
205     }
206
207   oprnds_info.release ();
208 }
209
210
211 /* Find the place of the data-ref in STMT in the interleaving chain that starts
212    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
213
214 static int
215 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
216 {
217   gimple next_stmt = first_stmt;
218   int result = 0;
219
220   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
221     return -1;
222
223   do
224     {
225       if (next_stmt == stmt)
226         return result;
227       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
228       if (next_stmt)
229         result += GROUP_GAP (vinfo_for_stmt (next_stmt));
230     }
231   while (next_stmt);
232
233   return -1;
234 }
235
236
237 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
238    they are of a valid type and that they match the defs of the first stmt of
239    the SLP group (stored in OPRNDS_INFO).  If there was a fatal error
240    return -1, if the error could be corrected by swapping operands of the
241    operation return 1, if everything is ok return 0.  */
242
243 static int 
244 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
245                              gimple stmt, unsigned stmt_num,
246                              vec<slp_oprnd_info> *oprnds_info)
247 {
248   tree oprnd;
249   unsigned int i, number_of_oprnds;
250   tree def;
251   gimple def_stmt;
252   enum vect_def_type dt = vect_uninitialized_def;
253   struct loop *loop = NULL;
254   bool pattern = false;
255   slp_oprnd_info oprnd_info;
256   int first_op_idx = 1;
257   bool commutative = false;
258   bool first_op_cond = false;
259   bool first = stmt_num == 0;
260   bool second = stmt_num == 1;
261
262   if (loop_vinfo)
263     loop = LOOP_VINFO_LOOP (loop_vinfo);
264
265   if (is_gimple_call (stmt))
266     {
267       number_of_oprnds = gimple_call_num_args (stmt);
268       first_op_idx = 3;
269     }
270   else if (is_gimple_assign (stmt))
271     {
272       enum tree_code code = gimple_assign_rhs_code (stmt);
273       number_of_oprnds = gimple_num_ops (stmt) - 1;
274       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
275         {
276           first_op_cond = true;
277           commutative = true;
278           number_of_oprnds++;
279         }
280       else
281         commutative = commutative_tree_code (code);
282     }
283   else
284     return -1;
285
286   bool swapped = false;
287   for (i = 0; i < number_of_oprnds; i++)
288     {
289 again:
290       if (first_op_cond)
291         {
292           if (i == 0 || i == 1)
293             oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
294                                   swapped ? !i : i);
295           else
296             oprnd = gimple_op (stmt, first_op_idx + i - 1);
297         }
298       else
299         oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
300
301       oprnd_info = (*oprnds_info)[i];
302
303       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
304                                &def, &dt)
305           || (!def_stmt && dt != vect_constant_def))
306         {
307           if (dump_enabled_p ())
308             {
309               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
310                                "Build SLP failed: can't find def for ");
311               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
312               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
313             }
314
315           return -1;
316         }
317
318       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
319          from the pattern.  Check that all the stmts of the node are in the
320          pattern.  */
321       if (def_stmt && gimple_bb (def_stmt)
322           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
323               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
324                   && gimple_code (def_stmt) != GIMPLE_PHI))
325           && vinfo_for_stmt (def_stmt)
326           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
327           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
328           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
329         {
330           pattern = true;
331           if (!first && !oprnd_info->first_pattern
332               /* Allow different pattern state for the defs of the
333                  first stmt in reduction chains.  */
334               && (oprnd_info->first_dt != vect_reduction_def
335                   || (!second && !oprnd_info->second_pattern)))
336             {
337               if (i == 0
338                   && !swapped
339                   && commutative)
340                 {
341                   swapped = true;
342                   goto again;
343                 }
344
345               if (dump_enabled_p ())
346                 {
347                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
348                                    "Build SLP failed: some of the stmts"
349                                    " are in a pattern, and others are not ");
350                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
351                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
352                 }
353
354               return 1;
355             }
356
357           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
358           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
359
360           if (dt == vect_unknown_def_type)
361             {
362               if (dump_enabled_p ())
363                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
364                                  "Unsupported pattern.\n");
365               return -1;
366             }
367
368           switch (gimple_code (def_stmt))
369             {
370               case GIMPLE_PHI:
371                 def = gimple_phi_result (def_stmt);
372                 break;
373
374               case GIMPLE_ASSIGN:
375                 def = gimple_assign_lhs (def_stmt);
376                 break;
377
378               default:
379                 if (dump_enabled_p ())
380                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
381                                    "unsupported defining stmt:\n");
382                 return -1;
383             }
384         }
385
386       if (second)
387         oprnd_info->second_pattern = pattern;
388
389       if (first)
390         {
391           oprnd_info->first_dt = dt;
392           oprnd_info->first_pattern = pattern;
393           oprnd_info->first_op_type = TREE_TYPE (oprnd);
394         }
395       else
396         {
397           /* Not first stmt of the group, check that the def-stmt/s match
398              the def-stmt/s of the first stmt.  Allow different definition
399              types for reduction chains: the first stmt must be a
400              vect_reduction_def (a phi node), and the rest
401              vect_internal_def.  */
402           if (((oprnd_info->first_dt != dt
403                 && !(oprnd_info->first_dt == vect_reduction_def
404                      && dt == vect_internal_def)
405                 && !((oprnd_info->first_dt == vect_external_def
406                       || oprnd_info->first_dt == vect_constant_def)
407                      && (dt == vect_external_def
408                          || dt == vect_constant_def)))
409                || !types_compatible_p (oprnd_info->first_op_type,
410                                        TREE_TYPE (oprnd))))
411             {
412               /* Try swapping operands if we got a mismatch.  */
413               if (i == 0
414                   && !swapped
415                   && commutative)
416                 {
417                   swapped = true;
418                   goto again;
419                 }
420
421               if (dump_enabled_p ())
422                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
423                                  "Build SLP failed: different types\n");
424
425               return 1;
426             }
427         }
428
429       /* Check the types of the definitions.  */
430       switch (dt)
431         {
432         case vect_constant_def:
433         case vect_external_def:
434         case vect_reduction_def:
435           break;
436
437         case vect_internal_def:
438           oprnd_info->def_stmts.quick_push (def_stmt);
439           break;
440
441         default:
442           /* FORNOW: Not supported.  */
443           if (dump_enabled_p ())
444             {
445               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
446                                "Build SLP failed: illegal type of def ");
447               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
448               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
449             }
450
451           return -1;
452         }
453     }
454
455   /* Swap operands.  */
456   if (swapped)
457     {
458       if (first_op_cond)
459         {
460           tree cond = gimple_assign_rhs1 (stmt);
461           swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
462                              &TREE_OPERAND (cond, 1));
463           TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
464         }
465       else
466         swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
467                            gimple_assign_rhs2_ptr (stmt));
468     }
469
470   return 0;
471 }
472
473
474 /* Verify if the scalar stmts STMTS are isomorphic, require data
475    permutation or are of unsupported types of operation.  Return
476    true if they are, otherwise return false and indicate in *MATCHES
477    which stmts are not isomorphic to the first one.  If MATCHES[0]
478    is false then this indicates the comparison could not be
479    carried out or the stmts will never be vectorized by SLP.  */
480
481 static bool
482 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
483                        vec<gimple> stmts, unsigned int group_size,
484                        unsigned nops, unsigned int *max_nunits,
485                        unsigned int vectorization_factor, bool *matches,
486                        bool *two_operators)
487 {
488   unsigned int i;
489   gimple first_stmt = stmts[0], stmt = stmts[0];
490   enum tree_code first_stmt_code = ERROR_MARK;
491   enum tree_code alt_stmt_code = ERROR_MARK;
492   enum tree_code rhs_code = ERROR_MARK;
493   enum tree_code first_cond_code = ERROR_MARK;
494   tree lhs;
495   bool need_same_oprnds = false;
496   tree vectype, scalar_type, first_op1 = NULL_TREE;
497   optab optab;
498   int icode;
499   machine_mode optab_op2_mode;
500   machine_mode vec_mode;
501   struct data_reference *first_dr;
502   HOST_WIDE_INT dummy;
503   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
504   tree cond;
505
506   /* For every stmt in NODE find its def stmt/s.  */
507   FOR_EACH_VEC_ELT (stmts, i, stmt)
508     {
509       matches[i] = false;
510
511       if (dump_enabled_p ())
512         {
513           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
514           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
515           dump_printf (MSG_NOTE, "\n");
516         }
517
518       /* Fail to vectorize statements marked as unvectorizable.  */
519       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
520         {
521           if (dump_enabled_p ())
522             {
523               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
524                                "Build SLP failed: unvectorizable statement ");
525               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
526               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
527             }
528           /* Fatal mismatch.  */
529           matches[0] = false;
530           return false;
531         }
532
533       lhs = gimple_get_lhs (stmt);
534       if (lhs == NULL_TREE)
535         {
536           if (dump_enabled_p ())
537             {
538               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
539                                "Build SLP failed: not GIMPLE_ASSIGN nor "
540                                "GIMPLE_CALL ");
541               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
542               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
543             }
544           /* Fatal mismatch.  */
545           matches[0] = false;
546           return false;
547         }
548
549        if (is_gimple_assign (stmt)
550            && gimple_assign_rhs_code (stmt) == COND_EXPR
551            && (cond = gimple_assign_rhs1 (stmt))
552            && !COMPARISON_CLASS_P (cond))
553         {
554           if (dump_enabled_p ())
555             {
556               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
557                                "Build SLP failed: condition is not "
558                                "comparison ");
559               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
560               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
561             }
562           /* Fatal mismatch.  */
563           matches[0] = false;
564           return false;
565         }
566
567       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
568       vectype = get_vectype_for_scalar_type (scalar_type);
569       if (!vectype)
570         {
571           if (dump_enabled_p ())
572             {
573               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
574                                "Build SLP failed: unsupported data-type ");
575               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
576                                  scalar_type);
577               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
578             }
579           /* Fatal mismatch.  */
580           matches[0] = false;
581           return false;
582         }
583
584       /* If populating the vector type requires unrolling then fail
585          before adjusting *max_nunits for basic-block vectorization.  */
586       if (bb_vinfo
587           && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
588         {
589           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
590                            "Build SLP failed: unrolling required "
591                            "in basic block SLP\n");
592           /* Fatal mismatch.  */
593           matches[0] = false;
594           return false;
595         }
596
597       /* In case of multiple types we need to detect the smallest type.  */
598       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
599         {
600           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
601           if (bb_vinfo)
602             vectorization_factor = *max_nunits;
603         }
604
605       if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
606         {
607           rhs_code = CALL_EXPR;
608           if (gimple_call_internal_p (call_stmt)
609               || gimple_call_tail_p (call_stmt)
610               || gimple_call_noreturn_p (call_stmt)
611               || !gimple_call_nothrow_p (call_stmt)
612               || gimple_call_chain (call_stmt))
613             {
614               if (dump_enabled_p ())
615                 {
616                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
617                                    "Build SLP failed: unsupported call type ");
618                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
619                                     call_stmt, 0);
620                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
621                 }
622               /* Fatal mismatch.  */
623               matches[0] = false;
624               return false;
625             }
626         }
627       else
628         rhs_code = gimple_assign_rhs_code (stmt);
629
630       /* Check the operation.  */
631       if (i == 0)
632         {
633           first_stmt_code = rhs_code;
634
635           /* Shift arguments should be equal in all the packed stmts for a
636              vector shift with scalar shift operand.  */
637           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
638               || rhs_code == LROTATE_EXPR
639               || rhs_code == RROTATE_EXPR)
640             {
641               vec_mode = TYPE_MODE (vectype);
642
643               /* First see if we have a vector/vector shift.  */
644               optab = optab_for_tree_code (rhs_code, vectype,
645                                            optab_vector);
646
647               if (!optab
648                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
649                 {
650                   /* No vector/vector shift, try for a vector/scalar shift.  */
651                   optab = optab_for_tree_code (rhs_code, vectype,
652                                                optab_scalar);
653
654                   if (!optab)
655                     {
656                       if (dump_enabled_p ())
657                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
658                                          "Build SLP failed: no optab.\n");
659                       /* Fatal mismatch.  */
660                       matches[0] = false;
661                       return false;
662                     }
663                   icode = (int) optab_handler (optab, vec_mode);
664                   if (icode == CODE_FOR_nothing)
665                     {
666                       if (dump_enabled_p ())
667                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
668                                          "Build SLP failed: "
669                                          "op not supported by target.\n");
670                       /* Fatal mismatch.  */
671                       matches[0] = false;
672                       return false;
673                     }
674                   optab_op2_mode = insn_data[icode].operand[2].mode;
675                   if (!VECTOR_MODE_P (optab_op2_mode))
676                     {
677                       need_same_oprnds = true;
678                       first_op1 = gimple_assign_rhs2 (stmt);
679                     }
680                 }
681             }
682           else if (rhs_code == WIDEN_LSHIFT_EXPR)
683             {
684               need_same_oprnds = true;
685               first_op1 = gimple_assign_rhs2 (stmt);
686             }
687         }
688       else
689         {
690           if (first_stmt_code != rhs_code
691               && alt_stmt_code == ERROR_MARK)
692             alt_stmt_code = rhs_code;
693           if (first_stmt_code != rhs_code
694               && (first_stmt_code != IMAGPART_EXPR
695                   || rhs_code != REALPART_EXPR)
696               && (first_stmt_code != REALPART_EXPR
697                   || rhs_code != IMAGPART_EXPR)
698               /* Handle mismatches in plus/minus by computing both
699                  and merging the results.  */
700               && !((first_stmt_code == PLUS_EXPR
701                     || first_stmt_code == MINUS_EXPR)
702                    && (alt_stmt_code == PLUS_EXPR
703                        || alt_stmt_code == MINUS_EXPR)
704                    && rhs_code == alt_stmt_code)
705               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
706                    && (first_stmt_code == ARRAY_REF
707                        || first_stmt_code == BIT_FIELD_REF
708                        || first_stmt_code == INDIRECT_REF
709                        || first_stmt_code == COMPONENT_REF
710                        || first_stmt_code == MEM_REF)))
711             {
712               if (dump_enabled_p ())
713                 {
714                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
715                                    "Build SLP failed: different operation "
716                                    "in stmt ");
717                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
718                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
719                                    "original stmt ");
720                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
721                                     first_stmt, 0);
722                 }
723               /* Mismatch.  */
724               continue;
725             }
726
727           if (need_same_oprnds
728               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
729             {
730               if (dump_enabled_p ())
731                 {
732                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
733                                    "Build SLP failed: different shift "
734                                    "arguments in ");
735                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
736                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
737                 }
738               /* Mismatch.  */
739               continue;
740             }
741
742           if (rhs_code == CALL_EXPR)
743             {
744               gimple first_stmt = stmts[0];
745               if (gimple_call_num_args (stmt) != nops
746                   || !operand_equal_p (gimple_call_fn (first_stmt),
747                                        gimple_call_fn (stmt), 0)
748                   || gimple_call_fntype (first_stmt)
749                      != gimple_call_fntype (stmt))
750                 {
751                   if (dump_enabled_p ())
752                     {
753                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
754                                        "Build SLP failed: different calls in ");
755                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
756                                         stmt, 0);
757                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
758                     }
759                   /* Mismatch.  */
760                   continue;
761                 }
762             }
763         }
764
765       /* Grouped store or load.  */
766       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
767         {
768           if (REFERENCE_CLASS_P (lhs))
769             {
770               /* Store.  */
771               ;
772             }
773           else
774             {
775               /* Load.  */
776               unsigned unrolling_factor
777                 = least_common_multiple
778                     (*max_nunits, group_size) / group_size;
779               /* FORNOW: Check that there is no gap between the loads
780                  and no gap between the groups when we need to load
781                  multiple groups at once.
782                  ???  We should enhance this to only disallow gaps
783                  inside vectors.  */
784               if ((unrolling_factor > 1
785                    && ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
786                         && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
787                        /* If the group is split up then GROUP_GAP
788                           isn't correct here, nor is GROUP_FIRST_ELEMENT.  */
789                        || GROUP_SIZE (vinfo_for_stmt (stmt)) > group_size))
790                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
791                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
792                 {
793                   if (dump_enabled_p ())
794                     {
795                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
796                                        "Build SLP failed: grouped "
797                                        "loads have gaps ");
798                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
799                                         stmt, 0);
800                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
801                     }
802                   /* Fatal mismatch.  */
803                   matches[0] = false;
804                   return false;
805                 }
806
807               /* Check that the size of interleaved loads group is not
808                  greater than the SLP group size.  */
809               unsigned ncopies
810                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
811               if (loop_vinfo
812                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
813                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
814                        - GROUP_GAP (vinfo_for_stmt (stmt)))
815                       > ncopies * group_size))
816                 {
817                   if (dump_enabled_p ())
818                     {
819                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
820                                        "Build SLP failed: the number "
821                                        "of interleaved loads is greater than "
822                                        "the SLP group size ");
823                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
824                                         stmt, 0);
825                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
826                     }
827                   /* Fatal mismatch.  */
828                   matches[0] = false;
829                   return false;
830                 }
831
832               old_first_load = first_load;
833               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
834               if (prev_first_load)
835                 {
836                   /* Check that there are no loads from different interleaving
837                      chains in the same node.  */
838                   if (prev_first_load != first_load)
839                     {
840                       if (dump_enabled_p ())
841                         {
842                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
843                                            vect_location, 
844                                            "Build SLP failed: different "
845                                            "interleaving chains in one node ");
846                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
847                                             stmt, 0);
848                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
849                         }
850                       /* Mismatch.  */
851                       continue;
852                     }
853                 }
854               else
855                 prev_first_load = first_load;
856
857               /* In some cases a group of loads is just the same load
858                  repeated N times.  Only analyze its cost once.  */
859               if (first_load == stmt && old_first_load != first_load)
860                 {
861                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
862                   if (vect_supportable_dr_alignment (first_dr, false)
863                       == dr_unaligned_unsupported)
864                     {
865                       if (dump_enabled_p ())
866                         {
867                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
868                                            vect_location, 
869                                            "Build SLP failed: unsupported "
870                                            "unaligned load ");
871                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
872                                             stmt, 0);
873                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
874                         }
875                       /* Fatal mismatch.  */
876                       matches[0] = false;
877                       return false;
878                     }
879                 }
880            }
881         } /* Grouped access.  */
882       else
883         {
884           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
885             {
886               /* Not grouped load.  */
887               if (dump_enabled_p ())
888                 {
889                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
890                                    "Build SLP failed: not grouped load ");
891                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
892                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
893                 }
894
895               /* FORNOW: Not grouped loads are not supported.  */
896               /* Fatal mismatch.  */
897               matches[0] = false;
898               return false;
899             }
900
901           /* Not memory operation.  */
902           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
903               && TREE_CODE_CLASS (rhs_code) != tcc_unary
904               && TREE_CODE_CLASS (rhs_code) != tcc_expression
905               && rhs_code != CALL_EXPR)
906             {
907               if (dump_enabled_p ())
908                 {
909                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
910                                    "Build SLP failed: operation");
911                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
912                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
913                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
914                 }
915               /* Fatal mismatch.  */
916               matches[0] = false;
917               return false;
918             }
919
920           if (rhs_code == COND_EXPR)
921             {
922               tree cond_expr = gimple_assign_rhs1 (stmt);
923
924               if (i == 0)
925                 first_cond_code = TREE_CODE (cond_expr);
926               else if (first_cond_code != TREE_CODE (cond_expr))
927                 {
928                   if (dump_enabled_p ())
929                     {
930                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
931                                        "Build SLP failed: different"
932                                        " operation");
933                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
934                                         stmt, 0);
935                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
936                     }
937                   /* Mismatch.  */
938                   continue;
939                 }
940             }
941         }
942
943       matches[i] = true;
944     }
945
946   for (i = 0; i < group_size; ++i)
947     if (!matches[i])
948       return false;
949
950   /* If we allowed a two-operation SLP node verify the target can cope
951      with the permute we are going to use.  */
952   if (alt_stmt_code != ERROR_MARK
953       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
954     {
955       unsigned char *sel
956         = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
957       for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
958         {
959           sel[i] = i;
960           if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
961             sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
962         }
963       if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
964         {
965           for (i = 0; i < group_size; ++i)
966             if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
967               {
968                 matches[i] = false;
969                 if (dump_enabled_p ())
970                   {
971                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
972                                      "Build SLP failed: different operation "
973                                      "in stmt ");
974                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
975                                       stmts[i], 0);
976                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
977                                      "original stmt ");
978                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
979                                       first_stmt, 0);
980                   }
981               }
982           return false;
983         }
984       *two_operators = true;
985     }
986
987   return true;
988 }
989
990 /* Recursively build an SLP tree starting from NODE.
991    Fail (and return a value not equal to zero) if def-stmts are not
992    isomorphic, require data permutation or are of unsupported types of
993    operation.  Otherwise, return 0.
994    The value returned is the depth in the SLP tree where a mismatch
995    was found.  */
996
997 static bool
998 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
999                      slp_tree *node, unsigned int group_size,
1000                      unsigned int *max_nunits,
1001                      vec<slp_tree> *loads,
1002                      unsigned int vectorization_factor,
1003                      bool *matches, unsigned *npermutes, unsigned *tree_size,
1004                      unsigned max_tree_size)
1005 {
1006   unsigned nops, i, this_tree_size = 0;
1007   gimple stmt;
1008
1009   matches[0] = false;
1010
1011   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
1012   if (is_gimple_call (stmt))
1013     nops = gimple_call_num_args (stmt);
1014   else if (is_gimple_assign (stmt))
1015     {
1016       nops = gimple_num_ops (stmt) - 1;
1017       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1018         nops++;
1019     }
1020   else
1021     return false;
1022
1023   bool two_operators = false;
1024   if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
1025                               SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
1026                               max_nunits, vectorization_factor, matches,
1027                               &two_operators))
1028     return false;
1029   SLP_TREE_TWO_OPERATORS (*node) = two_operators;
1030
1031   /* If the SLP node is a load, terminate the recursion.  */
1032   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1033       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1034     {
1035       loads->safe_push (*node);
1036       return true;
1037     }
1038
1039   /* Get at the operands, verifying they are compatible.  */
1040   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1041   slp_oprnd_info oprnd_info;
1042   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
1043     {
1044       switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
1045                                            stmt, i, &oprnds_info))
1046         {
1047         case 0:
1048           break;
1049         case -1:
1050           matches[0] = false;
1051           vect_free_oprnd_info (oprnds_info);
1052           return false;
1053         case 1:
1054           matches[i] = false;
1055           break;
1056         }
1057     }
1058   for (i = 0; i < group_size; ++i)
1059     if (!matches[i])
1060       {
1061         vect_free_oprnd_info (oprnds_info);
1062         return false;
1063       }
1064
1065   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
1066
1067   /* Create SLP_TREE nodes for the definition node/s.  */
1068   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1069     {
1070       slp_tree child;
1071       unsigned old_nloads = loads->length ();
1072       unsigned old_max_nunits = *max_nunits;
1073
1074       if (oprnd_info->first_dt != vect_internal_def)
1075         continue;
1076
1077       if (++this_tree_size > max_tree_size)
1078         {
1079           vect_free_oprnd_info (oprnds_info);
1080           return false;
1081         }
1082
1083       child = vect_create_new_slp_node (oprnd_info->def_stmts);
1084       if (!child)
1085         {
1086           vect_free_oprnd_info (oprnds_info);
1087           return false;
1088         }
1089
1090       if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1091                                group_size, max_nunits, loads,
1092                                vectorization_factor, matches,
1093                                npermutes, &this_tree_size, max_tree_size))
1094         {
1095           oprnd_info->def_stmts = vNULL;
1096           SLP_TREE_CHILDREN (*node).quick_push (child);
1097           continue;
1098         }
1099
1100       /* If the SLP build failed fatally and we analyze a basic-block
1101          simply treat nodes we fail to build as externally defined
1102          (and thus build vectors from the scalar defs).
1103          The cost model will reject outright expensive cases.
1104          ???  This doesn't treat cases where permutation ultimatively
1105          fails (or we don't try permutation below).  Ideally we'd
1106          even compute a permutation that will end up with the maximum
1107          SLP tree size...  */
1108       if (bb_vinfo
1109           && !matches[0]
1110           /* ???  Rejecting patterns this way doesn't work.  We'd have to
1111              do extra work to cancel the pattern so the uses see the
1112              scalar version.  */
1113           && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1114         {
1115           unsigned int j;
1116           slp_tree grandchild;
1117
1118           /* Roll back.  */
1119           *max_nunits = old_max_nunits;
1120           loads->truncate (old_nloads);
1121           FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1122             vect_free_slp_tree (grandchild);
1123           SLP_TREE_CHILDREN (child).truncate (0);
1124
1125           dump_printf_loc (MSG_NOTE, vect_location,
1126                            "Building vector operands from scalars\n");
1127           oprnd_info->def_stmts = vNULL;
1128           vect_free_slp_tree (child);
1129           SLP_TREE_CHILDREN (*node).quick_push (NULL);
1130           continue;
1131         }
1132
1133       /* If the SLP build for operand zero failed and operand zero
1134          and one can be commutated try that for the scalar stmts
1135          that failed the match.  */
1136       if (i == 0
1137           /* A first scalar stmt mismatch signals a fatal mismatch.  */
1138           && matches[0]
1139           /* ???  For COND_EXPRs we can swap the comparison operands
1140              as well as the arms under some constraints.  */
1141           && nops == 2
1142           && oprnds_info[1]->first_dt == vect_internal_def
1143           && is_gimple_assign (stmt)
1144           && commutative_tree_code (gimple_assign_rhs_code (stmt))
1145           && !SLP_TREE_TWO_OPERATORS (*node)
1146           /* Do so only if the number of not successful permutes was nor more
1147              than a cut-ff as re-trying the recursive match on
1148              possibly each level of the tree would expose exponential
1149              behavior.  */
1150           && *npermutes < 4)
1151         {
1152           unsigned int j;
1153           slp_tree grandchild;
1154
1155           /* Roll back.  */
1156           *max_nunits = old_max_nunits;
1157           loads->truncate (old_nloads);
1158           FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1159             vect_free_slp_tree (grandchild);
1160           SLP_TREE_CHILDREN (child).truncate (0);
1161
1162           /* Swap mismatched definition stmts.  */
1163           dump_printf_loc (MSG_NOTE, vect_location,
1164                            "Re-trying with swapped operands of stmts ");
1165           for (j = 0; j < group_size; ++j)
1166             if (!matches[j])
1167               {
1168                 gimple tem = oprnds_info[0]->def_stmts[j];
1169                 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1170                 oprnds_info[1]->def_stmts[j] = tem;
1171                 dump_printf (MSG_NOTE, "%d ", j);
1172               }
1173           dump_printf (MSG_NOTE, "\n");
1174           /* And try again with scratch 'matches' ... */
1175           bool *tem = XALLOCAVEC (bool, group_size);
1176           if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1177                                    group_size, max_nunits, loads,
1178                                    vectorization_factor,
1179                                    tem, npermutes, &this_tree_size,
1180                                    max_tree_size))
1181             {
1182               /* ... so if successful we can apply the operand swapping
1183                  to the GIMPLE IL.  This is necessary because for example
1184                  vect_get_slp_defs uses operand indexes and thus expects
1185                  canonical operand order.  */
1186               for (j = 0; j < group_size; ++j)
1187                 if (!matches[j])
1188                   {
1189                     gimple stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
1190                     swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1191                                        gimple_assign_rhs2_ptr (stmt));
1192                   }
1193               oprnd_info->def_stmts = vNULL;
1194               SLP_TREE_CHILDREN (*node).quick_push (child);
1195               continue;
1196             }
1197
1198           ++*npermutes;
1199         }
1200
1201       oprnd_info->def_stmts = vNULL;
1202       vect_free_slp_tree (child);
1203       vect_free_oprnd_info (oprnds_info);
1204       return false;
1205     }
1206
1207   if (tree_size)
1208     *tree_size += this_tree_size;
1209
1210   vect_free_oprnd_info (oprnds_info);
1211   return true;
1212 }
1213
1214 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1215
1216 static void
1217 vect_print_slp_tree (int dump_kind, slp_tree node)
1218 {
1219   int i;
1220   gimple stmt;
1221   slp_tree child;
1222
1223   if (!node)
1224     return;
1225
1226   dump_printf (dump_kind, "node ");
1227   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1228     {
1229       dump_printf (dump_kind, "\n\tstmt %d ", i);
1230       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1231     }
1232   dump_printf (dump_kind, "\n");
1233
1234   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1235     vect_print_slp_tree (dump_kind, child);
1236 }
1237
1238
1239 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1240    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1241    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1242    stmts in NODE are to be marked.  */
1243
1244 static void
1245 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1246 {
1247   int i;
1248   gimple stmt;
1249   slp_tree child;
1250
1251   if (!node)
1252     return;
1253
1254   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1255     if (j < 0 || i == j)
1256       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1257
1258   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1259     vect_mark_slp_stmts (child, mark, j);
1260 }
1261
1262
1263 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1264
1265 static void
1266 vect_mark_slp_stmts_relevant (slp_tree node)
1267 {
1268   int i;
1269   gimple stmt;
1270   stmt_vec_info stmt_info;
1271   slp_tree child;
1272
1273   if (!node)
1274     return;
1275
1276   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1277     {
1278       stmt_info = vinfo_for_stmt (stmt);
1279       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1280                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1281       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1282     }
1283
1284   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1285     vect_mark_slp_stmts_relevant (child);
1286 }
1287
1288
1289 /* Rearrange the statements of NODE according to PERMUTATION.  */
1290
1291 static void
1292 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1293                           vec<unsigned> permutation)
1294 {
1295   gimple stmt;
1296   vec<gimple> tmp_stmts;
1297   unsigned int i;
1298   slp_tree child;
1299
1300   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1301     vect_slp_rearrange_stmts (child, group_size, permutation);
1302
1303   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1304   tmp_stmts.create (group_size);
1305   tmp_stmts.quick_grow_cleared (group_size);
1306
1307   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1308     tmp_stmts[permutation[i]] = stmt;
1309
1310   SLP_TREE_SCALAR_STMTS (node).release ();
1311   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1312 }
1313
1314
1315 /* Check if the required load permutations in the SLP instance
1316    SLP_INSTN are supported.  */
1317
1318 static bool
1319 vect_supported_load_permutation_p (slp_instance slp_instn)
1320 {
1321   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1322   unsigned int i, j, k, next;
1323   sbitmap load_index;
1324   slp_tree node;
1325   gimple stmt, load, next_load, first_load;
1326   struct data_reference *dr;
1327
1328   if (dump_enabled_p ())
1329     {
1330       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1331       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1332         if (node->load_permutation.exists ())
1333           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1334             dump_printf (MSG_NOTE, "%d ", next);
1335         else
1336           for (k = 0; k < group_size; ++k)
1337             dump_printf (MSG_NOTE, "%d ", k);
1338       dump_printf (MSG_NOTE, "\n");
1339     }
1340
1341   /* In case of reduction every load permutation is allowed, since the order
1342      of the reduction statements is not important (as opposed to the case of
1343      grouped stores).  The only condition we need to check is that all the
1344      load nodes are of the same size and have the same permutation (and then
1345      rearrange all the nodes of the SLP instance according to this 
1346      permutation).  */
1347
1348   /* Check that all the load nodes are of the same size.  */
1349   /* ???  Can't we assert this? */
1350   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1351     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1352       return false;
1353
1354   node = SLP_INSTANCE_TREE (slp_instn);
1355   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1356
1357   /* Reduction (there are no data-refs in the root).
1358      In reduction chain the order of the loads is important.  */
1359   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1360       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1361     {
1362       slp_tree load;
1363       unsigned int lidx;
1364
1365       /* Compare all the permutation sequences to the first one.  We know
1366          that at least one load is permuted.  */
1367       node = SLP_INSTANCE_LOADS (slp_instn)[0];
1368       if (!node->load_permutation.exists ())
1369         return false;
1370       for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1371         {
1372           if (!load->load_permutation.exists ())
1373             return false;
1374           FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1375             if (lidx != node->load_permutation[j])
1376               return false;
1377         }
1378
1379       /* Check that the loads in the first sequence are different and there
1380          are no gaps between them.  */
1381       load_index = sbitmap_alloc (group_size);
1382       bitmap_clear (load_index);
1383       FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1384         {
1385           if (bitmap_bit_p (load_index, lidx))
1386             {
1387               sbitmap_free (load_index);
1388               return false;
1389             }
1390           bitmap_set_bit (load_index, lidx);
1391         }
1392       for (i = 0; i < group_size; i++)
1393         if (!bitmap_bit_p (load_index, i))
1394           {
1395             sbitmap_free (load_index);
1396             return false;
1397           }
1398       sbitmap_free (load_index);
1399
1400       /* This permutation is valid for reduction.  Since the order of the
1401          statements in the nodes is not important unless they are memory
1402          accesses, we can rearrange the statements in all the nodes
1403          according to the order of the loads.  */
1404       vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1405                                 node->load_permutation);
1406
1407       /* We are done, no actual permutations need to be generated.  */
1408       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1409         SLP_TREE_LOAD_PERMUTATION (node).release ();
1410       return true;
1411     }
1412
1413   /* In basic block vectorization we allow any subchain of an interleaving
1414      chain.
1415      FORNOW: not supported in loop SLP because of realignment compications.  */
1416   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1417     {
1418       /* Check whether the loads in an instance form a subchain and thus
1419          no permutation is necessary.  */
1420       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1421         {
1422           if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1423             continue;
1424           bool subchain_p = true;
1425           next_load = NULL;
1426           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1427             {
1428               if (j != 0 && next_load != load)
1429                 {
1430                   subchain_p = false;
1431                   break;
1432                 }
1433               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1434             }
1435           if (subchain_p)
1436             SLP_TREE_LOAD_PERMUTATION (node).release ();
1437           else
1438             {
1439               /* Verify the permutation can be generated.  */
1440               vec<tree> tem;
1441               if (!vect_transform_slp_perm_load (node, tem, NULL,
1442                                                  1, slp_instn, true))
1443                 {
1444                   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1445                                    vect_location,
1446                                    "unsupported load permutation\n");
1447                   return false;
1448                 }
1449             }
1450         }
1451
1452       /* Check that the alignment of the first load in every subchain, i.e.,
1453          the first statement in every load node, is supported.
1454          ???  This belongs in alignment checking.  */
1455       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1456         {
1457           first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1458           if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1459             {
1460               dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1461               if (vect_supportable_dr_alignment (dr, false)
1462                   == dr_unaligned_unsupported)
1463                 {
1464                   if (dump_enabled_p ())
1465                     {
1466                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1467                                        vect_location,
1468                                        "unsupported unaligned load ");
1469                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1470                                         first_load, 0);
1471                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1472                     }
1473                   return false;
1474                 }
1475             }
1476         }
1477
1478       return true;
1479     }
1480
1481   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1482      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1483      well (unless it's reduction).  */
1484   if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1485     return false;
1486   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1487     if (!node->load_permutation.exists ())
1488       return false;
1489
1490   load_index = sbitmap_alloc (group_size);
1491   bitmap_clear (load_index);
1492   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1493     {
1494       unsigned int lidx = node->load_permutation[0];
1495       if (bitmap_bit_p (load_index, lidx))
1496         {
1497           sbitmap_free (load_index);
1498           return false;
1499         }
1500       bitmap_set_bit (load_index, lidx);
1501       FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1502         if (k != lidx)
1503           {
1504             sbitmap_free (load_index);
1505             return false;
1506           }
1507     }
1508   for (i = 0; i < group_size; i++)
1509     if (!bitmap_bit_p (load_index, i))
1510       {
1511         sbitmap_free (load_index);
1512         return false;
1513       }
1514   sbitmap_free (load_index);
1515
1516   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1517     if (node->load_permutation.exists ()
1518         && !vect_transform_slp_perm_load
1519               (node, vNULL, NULL,
1520                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1521       return false;
1522   return true;
1523 }
1524
1525
1526 /* Find the last store in SLP INSTANCE.  */
1527
1528 static gimple
1529 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1530 {
1531   gimple last = NULL, stmt;
1532
1533   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1534     {
1535       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1536       if (is_pattern_stmt_p (stmt_vinfo))
1537         last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1538       else
1539         last = get_later_stmt (stmt, last);
1540     }
1541
1542   return last;
1543 }
1544
1545 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1546
1547 static void
1548 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1549                          stmt_vector_for_cost *prologue_cost_vec,
1550                          stmt_vector_for_cost *body_cost_vec,
1551                          unsigned ncopies_for_cost)
1552 {
1553   unsigned i;
1554   slp_tree child;
1555   gimple stmt, s;
1556   stmt_vec_info stmt_info;
1557   tree lhs;
1558   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1559
1560   /* Recurse down the SLP tree.  */
1561   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1562     if (child)
1563       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1564                                body_cost_vec, ncopies_for_cost);
1565
1566   /* Look at the first scalar stmt to determine the cost.  */
1567   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1568   stmt_info = vinfo_for_stmt (stmt);
1569   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1570     {
1571       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1572         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1573                                vect_uninitialized_def,
1574                                node, prologue_cost_vec, body_cost_vec);
1575       else
1576         {
1577           int i;
1578           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1579           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1580                                 node, prologue_cost_vec, body_cost_vec);
1581           /* If the load is permuted record the cost for the permutation.
1582              ???  Loads from multiple chains are let through here only
1583              for a single special case involving complex numbers where
1584              in the end no permutation is necessary.  */
1585           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1586             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1587                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1588                 && vect_get_place_in_interleaving_chain
1589                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1590               {
1591                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1592                                   stmt_info, 0, vect_body);
1593                 break;
1594               }
1595         }
1596     }
1597   else
1598     {
1599       record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1600                         stmt_info, 0, vect_body);
1601       if (SLP_TREE_TWO_OPERATORS (node))
1602         {
1603           record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1604                             stmt_info, 0, vect_body);
1605           record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1606                             stmt_info, 0, vect_body);
1607         }
1608     }
1609
1610   /* Scan operands and account for prologue cost of constants/externals.
1611      ???  This over-estimates cost for multiple uses and should be
1612      re-engineered.  */
1613   lhs = gimple_get_lhs (stmt);
1614   for (i = 0; i < gimple_num_ops (stmt); ++i)
1615     {
1616       tree def, op = gimple_op (stmt, i);
1617       gimple def_stmt;
1618       enum vect_def_type dt;
1619       if (!op || op == lhs)
1620         continue;
1621       if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
1622                               STMT_VINFO_BB_VINFO (stmt_info),
1623                               &def_stmt, &def, &dt))
1624         {
1625           /* Without looking at the actual initializer a vector of
1626              constants can be implemented as load from the constant pool.
1627              ???  We need to pass down stmt_info for a vector type
1628              even if it points to the wrong stmt.  */
1629           if (dt == vect_constant_def)
1630             record_stmt_cost (prologue_cost_vec, 1, vector_load,
1631                               stmt_info, 0, vect_prologue);
1632           else if (dt == vect_external_def)
1633             record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1634                               stmt_info, 0, vect_prologue);
1635         }
1636     }
1637 }
1638
1639 /* Compute the cost for the SLP instance INSTANCE.  */
1640
1641 static void
1642 vect_analyze_slp_cost (slp_instance instance, void *data)
1643 {
1644   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1645   unsigned ncopies_for_cost;
1646   stmt_info_for_cost *si;
1647   unsigned i;
1648
1649   /* Calculate the number of vector stmts to create based on the unrolling
1650      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1651      GROUP_SIZE / NUNITS otherwise.  */
1652   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1653   slp_tree node = SLP_INSTANCE_TREE (instance);
1654   stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1655   /* Adjust the group_size by the vectorization factor which is always one
1656      for basic-block vectorization.  */
1657   if (STMT_VINFO_LOOP_VINFO (stmt_info))
1658     group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1659   unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1660   /* For reductions look at a reduction operand in case the reduction
1661      operation is widening like DOT_PROD or SAD.  */
1662   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1663     {
1664       gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1665       switch (gimple_assign_rhs_code (stmt))
1666         {
1667         case DOT_PROD_EXPR:
1668         case SAD_EXPR:
1669           nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1670                                 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1671           break;
1672         default:;
1673         }
1674     }
1675   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1676
1677   prologue_cost_vec.create (10);
1678   body_cost_vec.create (10);
1679   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1680                            &prologue_cost_vec, &body_cost_vec,
1681                            ncopies_for_cost);
1682
1683   /* Record the prologue costs, which were delayed until we were
1684      sure that SLP was successful.  */
1685   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1686     {
1687       struct _stmt_vec_info *stmt_info
1688         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1689       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1690                             si->misalign, vect_prologue);
1691     }
1692
1693   /* Record the instance's instructions in the target cost model.  */
1694   FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1695     {
1696       struct _stmt_vec_info *stmt_info
1697         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1698       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1699                             si->misalign, vect_body);
1700     }
1701
1702   prologue_cost_vec.release ();
1703   body_cost_vec.release ();
1704 }
1705
1706 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1707    vect_build_slp_tree to build a tree of packed stmts if possible.
1708    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1709
1710 static bool
1711 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1712                            gimple stmt, unsigned max_tree_size)
1713 {
1714   slp_instance new_instance;
1715   slp_tree node;
1716   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1717   unsigned int unrolling_factor = 1, nunits;
1718   tree vectype, scalar_type = NULL_TREE;
1719   gimple next;
1720   unsigned int vectorization_factor = 0;
1721   int i;
1722   unsigned int max_nunits = 0;
1723   vec<slp_tree> loads;
1724   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1725   vec<gimple> scalar_stmts;
1726
1727   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1728     {
1729       if (dr)
1730         {
1731           scalar_type = TREE_TYPE (DR_REF (dr));
1732           vectype = get_vectype_for_scalar_type (scalar_type);
1733         }
1734       else
1735         {
1736           gcc_assert (loop_vinfo);
1737           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1738         }
1739
1740       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1741     }
1742   else
1743     {
1744       gcc_assert (loop_vinfo);
1745       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1746       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1747     }
1748
1749   if (!vectype)
1750     {
1751       if (dump_enabled_p ())
1752         {
1753           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1754                            "Build SLP failed: unsupported data-type ");
1755           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1756           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1757         }
1758
1759       return false;
1760     }
1761
1762   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1763   if (loop_vinfo)
1764     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1765   else
1766     vectorization_factor = nunits;
1767
1768   /* Calculate the unrolling factor.  */
1769   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1770   if (unrolling_factor != 1 && !loop_vinfo)
1771     {
1772       if (dump_enabled_p ())
1773         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1774                          "Build SLP failed: unrolling required in basic"
1775                          " block SLP\n");
1776
1777       return false;
1778     }
1779
1780   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1781   scalar_stmts.create (group_size);
1782   next = stmt;
1783   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1784     {
1785       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1786       while (next)
1787         {
1788           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1789               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1790             scalar_stmts.safe_push (
1791                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1792           else
1793             scalar_stmts.safe_push (next);
1794           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1795         }
1796     }
1797   else
1798     {
1799       /* Collect reduction statements.  */
1800       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1801       for (i = 0; reductions.iterate (i, &next); i++)
1802         scalar_stmts.safe_push (next);
1803     }
1804
1805   node = vect_create_new_slp_node (scalar_stmts);
1806
1807   loads.create (group_size);
1808
1809   /* Build the tree for the SLP instance.  */
1810   bool *matches = XALLOCAVEC (bool, group_size);
1811   unsigned npermutes = 0;
1812   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1813                            &max_nunits, &loads,
1814                            vectorization_factor, matches, &npermutes, NULL,
1815                            max_tree_size))
1816     {
1817       /* Calculate the unrolling factor based on the smallest type.  */
1818       if (max_nunits > nunits)
1819         unrolling_factor = least_common_multiple (max_nunits, group_size)
1820                            / group_size;
1821
1822       if (unrolling_factor != 1 && !loop_vinfo)
1823         {
1824           if (dump_enabled_p ())
1825             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1826                              "Build SLP failed: unrolling required in basic"
1827                              " block SLP\n");
1828           vect_free_slp_tree (node);
1829           loads.release ();
1830           return false;
1831         }
1832
1833       /* Create a new SLP instance.  */
1834       new_instance = XNEW (struct _slp_instance);
1835       SLP_INSTANCE_TREE (new_instance) = node;
1836       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1837       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1838       SLP_INSTANCE_LOADS (new_instance) = loads;
1839
1840       /* Compute the load permutation.  */
1841       slp_tree load_node;
1842       bool loads_permuted = false;
1843       FOR_EACH_VEC_ELT (loads, i, load_node)
1844         {
1845           vec<unsigned> load_permutation;
1846           int j;
1847           gimple load, first_stmt;
1848           bool this_load_permuted = false;
1849           load_permutation.create (group_size);
1850           first_stmt = GROUP_FIRST_ELEMENT
1851               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1852           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1853             {
1854               int load_place
1855                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1856               gcc_assert (load_place != -1);
1857               if (load_place != j)
1858                 this_load_permuted = true;
1859               load_permutation.safe_push (load_place);
1860             }
1861           if (!this_load_permuted)
1862             {
1863               load_permutation.release ();
1864               continue;
1865             }
1866           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1867           loads_permuted = true;
1868         }
1869
1870       if (loads_permuted)
1871         {
1872           if (!vect_supported_load_permutation_p (new_instance))
1873             {
1874               if (dump_enabled_p ())
1875                 {
1876                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1877                                    "Build SLP failed: unsupported load "
1878                                    "permutation ");
1879                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1880                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1881                 }
1882               vect_free_slp_instance (new_instance);
1883               return false;
1884             }
1885         }
1886
1887
1888       if (loop_vinfo)
1889         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1890       else
1891         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1892
1893       if (dump_enabled_p ())
1894         vect_print_slp_tree (MSG_NOTE, node);
1895
1896       return true;
1897     }
1898
1899   /* Failed to SLP.  */
1900   /* Free the allocated memory.  */
1901   vect_free_slp_tree (node);
1902   loads.release ();
1903
1904   return false;
1905 }
1906
1907
1908 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1909    trees of packed scalar stmts if SLP is possible.  */
1910
1911 bool
1912 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1913                   unsigned max_tree_size)
1914 {
1915   unsigned int i;
1916   vec<gimple> grouped_stores;
1917   vec<gimple> reductions = vNULL;
1918   vec<gimple> reduc_chains = vNULL;
1919   gimple first_element;
1920   bool ok = false;
1921
1922   if (dump_enabled_p ())
1923     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1924
1925   if (loop_vinfo)
1926     {
1927       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1928       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1929       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1930     }
1931   else
1932     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1933
1934   /* Find SLP sequences starting from groups of grouped stores.  */
1935   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1936     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1937                                    max_tree_size))
1938       ok = true;
1939
1940   if (reduc_chains.length () > 0)
1941     {
1942       /* Find SLP sequences starting from reduction chains.  */
1943       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1944         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1945                                        max_tree_size))
1946           ok = true;
1947         else
1948           return false;
1949
1950       /* Don't try to vectorize SLP reductions if reduction chain was
1951          detected.  */
1952       return ok;
1953     }
1954
1955   /* Find SLP sequences starting from groups of reductions.  */
1956   if (reductions.length () > 1
1957       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1958                                     max_tree_size))
1959     ok = true;
1960
1961   return true;
1962 }
1963
1964
1965 /* For each possible SLP instance decide whether to SLP it and calculate overall
1966    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1967    least one instance.  */
1968
1969 bool
1970 vect_make_slp_decision (loop_vec_info loop_vinfo)
1971 {
1972   unsigned int i, unrolling_factor = 1;
1973   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1974   slp_instance instance;
1975   int decided_to_slp = 0;
1976
1977   if (dump_enabled_p ())
1978     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1979                      "\n");
1980
1981   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1982     {
1983       /* FORNOW: SLP if you can.  */
1984       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1985         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1986
1987       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1988          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1989          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1990       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1991       decided_to_slp++;
1992     }
1993
1994   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1995
1996   if (decided_to_slp && dump_enabled_p ())
1997     dump_printf_loc (MSG_NOTE, vect_location,
1998                      "Decided to SLP %d instances. Unrolling factor %d\n",
1999                      decided_to_slp, unrolling_factor);
2000
2001   return (decided_to_slp > 0);
2002 }
2003
2004
2005 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2006    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
2007
2008 static void
2009 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2010 {
2011   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2012   imm_use_iterator imm_iter;
2013   gimple use_stmt;
2014   stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2015   slp_tree child;
2016   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2017   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2018   int j;
2019
2020   /* Propagate hybrid down the SLP tree.  */
2021   if (stype == hybrid)
2022     ;
2023   else if (HYBRID_SLP_STMT (stmt_vinfo))
2024     stype = hybrid;
2025   else
2026     {
2027       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
2028       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2029       if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2030         FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
2031           if (gimple_bb (use_stmt)
2032               && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2033               && (use_vinfo = vinfo_for_stmt (use_stmt))
2034               && !STMT_SLP_TYPE (use_vinfo)
2035               && (STMT_VINFO_RELEVANT (use_vinfo)
2036                   || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))
2037                   || (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2038                       && STMT_VINFO_RELATED_STMT (use_vinfo)
2039                       && !STMT_SLP_TYPE (vinfo_for_stmt
2040                             (STMT_VINFO_RELATED_STMT (use_vinfo)))))
2041               && !(gimple_code (use_stmt) == GIMPLE_PHI
2042                    && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2043             stype = hybrid;
2044     }
2045
2046   if (stype == hybrid)
2047     {
2048       if (dump_enabled_p ())
2049         {
2050           dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2051           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2052         }
2053       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2054     }
2055
2056   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2057     if (child)
2058       vect_detect_hybrid_slp_stmts (child, i, stype);
2059 }
2060
2061 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
2062
2063 static tree
2064 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2065 {
2066   walk_stmt_info *wi = (walk_stmt_info *)data;
2067   struct loop *loopp = (struct loop *)wi->info;
2068
2069   if (wi->is_lhs)
2070     return NULL_TREE;
2071
2072   if (TREE_CODE (*tp) == SSA_NAME
2073       && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2074     {
2075       gimple def_stmt = SSA_NAME_DEF_STMT (*tp);
2076       if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2077           && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2078         {
2079           if (dump_enabled_p ())
2080             {
2081               dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2082               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2083             }
2084           STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2085         }
2086     }
2087
2088   return NULL_TREE;
2089 }
2090
2091 static tree
2092 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2093                           walk_stmt_info *)
2094 {
2095   /* If the stmt is in a SLP instance then this isn't a reason
2096      to mark use definitions in other SLP instances as hybrid.  */
2097   if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2098     *handled = true;
2099   return NULL_TREE;
2100 }
2101
2102 /* Find stmts that must be both vectorized and SLPed.  */
2103
2104 void
2105 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2106 {
2107   unsigned int i;
2108   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2109   slp_instance instance;
2110
2111   if (dump_enabled_p ())
2112     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2113                      "\n");
2114
2115   /* First walk all pattern stmt in the loop and mark defs of uses as
2116      hybrid because immediate uses in them are not recorded.  */
2117   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2118     {
2119       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2120       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2121            gsi_next (&gsi))
2122         {
2123           gimple stmt = gsi_stmt (gsi);
2124           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2125           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2126             {
2127               walk_stmt_info wi;
2128               memset (&wi, 0, sizeof (wi));
2129               wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2130               gimple_stmt_iterator gsi2
2131                 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2132               walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2133                                 vect_detect_hybrid_slp_1, &wi);
2134               walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2135                                vect_detect_hybrid_slp_2,
2136                                vect_detect_hybrid_slp_1, &wi);
2137             }
2138         }
2139     }
2140
2141   /* Then walk the SLP instance trees marking stmts with uses in
2142      non-SLP stmts as hybrid, also propagating hybrid down the
2143      SLP tree, collecting the above info on-the-fly.  */
2144   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2145     {
2146       for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2147         vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2148                                       i, pure_slp);
2149     }
2150 }
2151
2152
2153 /* Create and initialize a new bb_vec_info struct for BB, as well as
2154    stmt_vec_info structs for all the stmts in it.  */
2155
2156 static bb_vec_info
2157 new_bb_vec_info (basic_block bb)
2158 {
2159   bb_vec_info res = NULL;
2160   gimple_stmt_iterator gsi;
2161
2162   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2163   BB_VINFO_BB (res) = bb;
2164
2165   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2166     {
2167       gimple stmt = gsi_stmt (gsi);
2168       gimple_set_uid (stmt, 0);
2169       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
2170     }
2171
2172   BB_VINFO_GROUPED_STORES (res).create (10);
2173   BB_VINFO_SLP_INSTANCES (res).create (2);
2174   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2175
2176   bb->aux = res;
2177   return res;
2178 }
2179
2180
2181 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2182    stmts in the basic block.  */
2183
2184 static void
2185 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2186 {
2187   vec<slp_instance> slp_instances;
2188   slp_instance instance;
2189   basic_block bb;
2190   gimple_stmt_iterator si;
2191   unsigned i;
2192
2193   if (!bb_vinfo)
2194     return;
2195
2196   bb = BB_VINFO_BB (bb_vinfo);
2197
2198   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2199     {
2200       gimple stmt = gsi_stmt (si);
2201       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2202
2203       if (stmt_info)
2204         /* Free stmt_vec_info.  */
2205         free_stmt_vec_info (stmt);
2206     }
2207
2208   vect_destroy_datarefs (NULL, bb_vinfo);
2209   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2210   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2211   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2212   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2213     vect_free_slp_instance (instance);
2214   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2215   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2216   free (bb_vinfo);
2217   bb->aux = NULL;
2218 }
2219
2220
2221 /* Analyze statements contained in SLP tree node after recursively analyzing
2222    the subtree. Return TRUE if the operations are supported.  */
2223
2224 static bool
2225 vect_slp_analyze_node_operations (slp_tree node)
2226 {
2227   bool dummy;
2228   int i;
2229   gimple stmt;
2230   slp_tree child;
2231
2232   if (!node)
2233     return true;
2234
2235   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2236     if (!vect_slp_analyze_node_operations (child))
2237       return false;
2238
2239   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2240     {
2241       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2242       gcc_assert (stmt_info);
2243       gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2244
2245       if (!vect_analyze_stmt (stmt, &dummy, node))
2246         return false;
2247     }
2248
2249   return true;
2250 }
2251
2252
2253 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
2254    operations are supported. */
2255
2256 bool
2257 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2258 {
2259   slp_instance instance;
2260   int i;
2261
2262   if (dump_enabled_p ())
2263     dump_printf_loc (MSG_NOTE, vect_location,
2264                      "=== vect_slp_analyze_operations ===\n");
2265
2266   for (i = 0; slp_instances.iterate (i, &instance); )
2267     {
2268       if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2269         {
2270           dump_printf_loc (MSG_NOTE, vect_location,
2271                            "removing SLP instance operations starting from: ");
2272           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2273                             SLP_TREE_SCALAR_STMTS
2274                               (SLP_INSTANCE_TREE (instance))[0], 0);
2275           vect_free_slp_instance (instance);
2276           slp_instances.ordered_remove (i);
2277         }
2278       else
2279         {
2280           /* Compute the costs of the SLP instance.  */
2281           vect_analyze_slp_cost (instance, data);
2282           i++;
2283         }
2284     }
2285
2286   if (!slp_instances.length ())
2287     return false;
2288
2289   return true;
2290 }
2291
2292
2293 /* Compute the scalar cost of the SLP node NODE and its children
2294    and return it.  Do not account defs that are marked in LIFE and
2295    update LIFE according to uses of NODE.  */
2296
2297 static unsigned
2298 vect_bb_slp_scalar_cost (basic_block bb,
2299                          slp_tree node, vec<bool, va_heap> *life)
2300 {
2301   unsigned scalar_cost = 0;
2302   unsigned i;
2303   gimple stmt;
2304   slp_tree child;
2305
2306   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2307     {
2308       unsigned stmt_cost;
2309       ssa_op_iter op_iter;
2310       def_operand_p def_p;
2311       stmt_vec_info stmt_info;
2312
2313       if ((*life)[i])
2314         continue;
2315
2316       /* If there is a non-vectorized use of the defs then the scalar
2317          stmt is kept live in which case we do not account it or any
2318          required defs in the SLP children in the scalar cost.  This
2319          way we make the vectorization more costly when compared to
2320          the scalar cost.  */
2321       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2322         {
2323           imm_use_iterator use_iter;
2324           gimple use_stmt;
2325           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2326             if (!is_gimple_debug (use_stmt)
2327                 && (gimple_code (use_stmt) == GIMPLE_PHI
2328                     || gimple_bb (use_stmt) != bb
2329                     || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2330               {
2331                 (*life)[i] = true;
2332                 BREAK_FROM_IMM_USE_STMT (use_iter);
2333               }
2334         }
2335       if ((*life)[i])
2336         continue;
2337
2338       stmt_info = vinfo_for_stmt (stmt);
2339       if (STMT_VINFO_DATA_REF (stmt_info))
2340         {
2341           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2342             stmt_cost = vect_get_stmt_cost (scalar_load);
2343           else
2344             stmt_cost = vect_get_stmt_cost (scalar_store);
2345         }
2346       else
2347         stmt_cost = vect_get_stmt_cost (scalar_stmt);
2348
2349       scalar_cost += stmt_cost;
2350     }
2351
2352   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2353     if (child)
2354       scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2355
2356   return scalar_cost;
2357 }
2358
2359 /* Check if vectorization of the basic block is profitable.  */
2360
2361 static bool
2362 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2363 {
2364   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2365   slp_instance instance;
2366   int i;
2367   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2368   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2369
2370   /* Calculate scalar cost.  */
2371   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2372     {
2373       auto_vec<bool, 20> life;
2374       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2375       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2376                                               SLP_INSTANCE_TREE (instance),
2377                                               &life);
2378     }
2379
2380   /* Complete the target-specific cost calculation.  */
2381   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2382                &vec_inside_cost, &vec_epilogue_cost);
2383
2384   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2385
2386   if (dump_enabled_p ())
2387     {
2388       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2389       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2390                    vec_inside_cost);
2391       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2392       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2393       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
2394     }
2395
2396   /* Vectorization is profitable if its cost is less than the cost of scalar
2397      version.  */
2398   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2399     return false;
2400
2401   return true;
2402 }
2403
2404 /* Check if the basic block can be vectorized.  */
2405
2406 static bb_vec_info
2407 vect_slp_analyze_bb_1 (basic_block bb)
2408 {
2409   bb_vec_info bb_vinfo;
2410   vec<slp_instance> slp_instances;
2411   slp_instance instance;
2412   int i;
2413   int min_vf = 2;
2414   unsigned n_stmts = 0;
2415
2416   bb_vinfo = new_bb_vec_info (bb);
2417   if (!bb_vinfo)
2418     return NULL;
2419
2420   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
2421     {
2422       if (dump_enabled_p ())
2423         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2424                          "not vectorized: unhandled data-ref in basic "
2425                          "block.\n");
2426
2427       destroy_bb_vec_info (bb_vinfo);
2428       return NULL;
2429     }
2430
2431   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2432     {
2433       if (dump_enabled_p ())
2434         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2435                          "not vectorized: not enough data-refs in "
2436                          "basic block.\n");
2437
2438       destroy_bb_vec_info (bb_vinfo);
2439       return NULL;
2440     }
2441
2442   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2443     {
2444      if (dump_enabled_p ())
2445        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2446                         "not vectorized: unhandled data access in "
2447                         "basic block.\n");
2448
2449       destroy_bb_vec_info (bb_vinfo);
2450       return NULL;
2451     }
2452
2453   vect_pattern_recog (NULL, bb_vinfo);
2454
2455   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2456     {
2457       if (dump_enabled_p ())
2458         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459                          "not vectorized: bad data alignment in basic "
2460                          "block.\n");
2461
2462       destroy_bb_vec_info (bb_vinfo);
2463       return NULL;
2464     }
2465
2466   /* Check the SLP opportunities in the basic block, analyze and build SLP
2467      trees.  */
2468   if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
2469     {
2470       if (dump_enabled_p ())
2471         {
2472           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2473                            "Failed to SLP the basic block.\n");
2474           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2475                            "not vectorized: failed to find SLP opportunities "
2476                            "in basic block.\n");
2477         }
2478
2479       destroy_bb_vec_info (bb_vinfo);
2480       return NULL;
2481     }
2482
2483   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2484
2485   /* Mark all the statements that we want to vectorize as pure SLP and
2486      relevant.  */
2487   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2488     {
2489       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2490       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2491     }
2492
2493   /* Mark all the statements that we do not want to vectorize.  */
2494   for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2495        !gsi_end_p (gsi); gsi_next (&gsi))
2496     {
2497       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2498       if (STMT_SLP_TYPE (vinfo) != pure_slp)
2499         STMT_VINFO_VECTORIZABLE (vinfo) = false;
2500     }
2501
2502   /* Analyze dependences.  At this point all stmts not participating in
2503      vectorization have to be marked.  Dependence analysis assumes
2504      that we either vectorize all SLP instances or none at all.  */
2505   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2506      {
2507        if (dump_enabled_p ())
2508          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2509                           "not vectorized: unhandled data dependence "
2510                           "in basic block.\n");
2511
2512        destroy_bb_vec_info (bb_vinfo);
2513        return NULL;
2514      }
2515
2516   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2517     {
2518       if (dump_enabled_p ())
2519         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2520                          "not vectorized: unsupported alignment in basic "
2521                          "block.\n");
2522       destroy_bb_vec_info (bb_vinfo);
2523       return NULL;
2524     }
2525
2526   if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2527                                     BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2528     {
2529       if (dump_enabled_p ())
2530         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2531                          "not vectorized: bad operation in basic block.\n");
2532
2533       destroy_bb_vec_info (bb_vinfo);
2534       return NULL;
2535     }
2536
2537   /* Cost model: check if the vectorization is worthwhile.  */
2538   if (!unlimited_cost_model (NULL)
2539       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2540     {
2541       if (dump_enabled_p ())
2542         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2543                          "not vectorized: vectorization is not "
2544                          "profitable.\n");
2545
2546       destroy_bb_vec_info (bb_vinfo);
2547       return NULL;
2548     }
2549
2550   if (dump_enabled_p ())
2551     dump_printf_loc (MSG_NOTE, vect_location,
2552                      "Basic block will be vectorized using SLP\n");
2553
2554   return bb_vinfo;
2555 }
2556
2557
2558 bb_vec_info
2559 vect_slp_analyze_bb (basic_block bb)
2560 {
2561   bb_vec_info bb_vinfo;
2562   int insns = 0;
2563   gimple_stmt_iterator gsi;
2564   unsigned int vector_sizes;
2565
2566   if (dump_enabled_p ())
2567     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2568
2569   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2570     {
2571       gimple stmt = gsi_stmt (gsi);
2572       if (!is_gimple_debug (stmt)
2573           && !gimple_nop_p (stmt)
2574           && gimple_code (stmt) != GIMPLE_LABEL)
2575         insns++;
2576     }
2577
2578   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2579     {
2580       if (dump_enabled_p ())
2581         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2582                          "not vectorized: too many instructions in "
2583                          "basic block.\n");
2584
2585       return NULL;
2586     }
2587
2588   /* Autodetect first vector size we try.  */
2589   current_vector_size = 0;
2590   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2591
2592   while (1)
2593     {
2594       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2595       if (bb_vinfo)
2596         return bb_vinfo;
2597
2598       destroy_bb_vec_info (bb_vinfo);
2599
2600       vector_sizes &= ~current_vector_size;
2601       if (vector_sizes == 0
2602           || current_vector_size == 0)
2603         return NULL;
2604
2605       /* Try the next biggest vector size.  */
2606       current_vector_size = 1 << floor_log2 (vector_sizes);
2607       if (dump_enabled_p ())
2608         dump_printf_loc (MSG_NOTE, vect_location,
2609                          "***** Re-trying analysis with "
2610                          "vector size %d\n", current_vector_size);
2611     }
2612 }
2613
2614
2615 /* For constant and loop invariant defs of SLP_NODE this function returns
2616    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2617    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2618    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2619    REDUC_INDEX is the index of the reduction operand in the statements, unless
2620    it is -1.  */
2621
2622 static void
2623 vect_get_constant_vectors (tree op, slp_tree slp_node,
2624                            vec<tree> *vec_oprnds,
2625                            unsigned int op_num, unsigned int number_of_vectors,
2626                            int reduc_index)
2627 {
2628   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2629   gimple stmt = stmts[0];
2630   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2631   unsigned nunits;
2632   tree vec_cst;
2633   tree *elts;
2634   unsigned j, number_of_places_left_in_vector;
2635   tree vector_type;
2636   tree vop;
2637   int group_size = stmts.length ();
2638   unsigned int vec_num, i;
2639   unsigned number_of_copies = 1;
2640   vec<tree> voprnds;
2641   voprnds.create (number_of_vectors);
2642   bool constant_p, is_store;
2643   tree neutral_op = NULL;
2644   enum tree_code code = gimple_expr_code (stmt);
2645   gimple def_stmt;
2646   struct loop *loop;
2647   gimple_seq ctor_seq = NULL;
2648
2649   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2650   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2651
2652   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2653       && reduc_index != -1)
2654     {
2655       op_num = reduc_index;
2656       op = gimple_op (stmt, op_num + 1);
2657       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2658          we need either neutral operands or the original operands.  See
2659          get_initial_def_for_reduction() for details.  */
2660       switch (code)
2661         {
2662           case WIDEN_SUM_EXPR:
2663           case DOT_PROD_EXPR:
2664           case SAD_EXPR:
2665           case PLUS_EXPR:
2666           case MINUS_EXPR:
2667           case BIT_IOR_EXPR:
2668           case BIT_XOR_EXPR:
2669              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2670                neutral_op = build_real (TREE_TYPE (op), dconst0);
2671              else
2672                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2673
2674              break;
2675
2676           case MULT_EXPR:
2677              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2678                neutral_op = build_real (TREE_TYPE (op), dconst1);
2679              else
2680                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2681
2682              break;
2683
2684           case BIT_AND_EXPR:
2685             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2686             break;
2687
2688           /* For MIN/MAX we don't have an easy neutral operand but
2689              the initial values can be used fine here.  Only for
2690              a reduction chain we have to force a neutral element.  */
2691           case MAX_EXPR:
2692           case MIN_EXPR:
2693             if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2694               neutral_op = NULL;
2695             else
2696               {
2697                 def_stmt = SSA_NAME_DEF_STMT (op);
2698                 loop = (gimple_bb (stmt))->loop_father;
2699                 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2700                                                     loop_preheader_edge (loop));
2701               }
2702             break;
2703
2704           default:
2705             gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
2706             neutral_op = NULL;
2707         }
2708     }
2709
2710   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2711     {
2712       is_store = true;
2713       op = gimple_assign_rhs1 (stmt);
2714     }
2715   else
2716     is_store = false;
2717
2718   gcc_assert (op);
2719
2720   if (CONSTANT_CLASS_P (op))
2721     constant_p = true;
2722   else
2723     constant_p = false;
2724
2725   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2726      created vectors. It is greater than 1 if unrolling is performed.
2727
2728      For example, we have two scalar operands, s1 and s2 (e.g., group of
2729      strided accesses of size two), while NUNITS is four (i.e., four scalars
2730      of this type can be packed in a vector).  The output vector will contain
2731      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2732      will be 2).
2733
2734      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2735      containing the operands.
2736
2737      For example, NUNITS is four as before, and the group size is 8
2738      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2739      {s5, s6, s7, s8}.  */
2740
2741   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2742
2743   number_of_places_left_in_vector = nunits;
2744   elts = XALLOCAVEC (tree, nunits);
2745   bool place_after_defs = false;
2746   for (j = 0; j < number_of_copies; j++)
2747     {
2748       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2749         {
2750           if (is_store)
2751             op = gimple_assign_rhs1 (stmt);
2752           else
2753             {
2754               switch (code)
2755                 {
2756                   case COND_EXPR:
2757                     if (op_num == 0 || op_num == 1)
2758                       {
2759                         tree cond = gimple_assign_rhs1 (stmt);
2760                         op = TREE_OPERAND (cond, op_num);
2761                       }
2762                     else
2763                       {
2764                         if (op_num == 2)
2765                           op = gimple_assign_rhs2 (stmt);
2766                         else
2767                           op = gimple_assign_rhs3 (stmt);
2768                       }
2769                     break;
2770
2771                   case CALL_EXPR:
2772                     op = gimple_call_arg (stmt, op_num);
2773                     break;
2774
2775                   case LSHIFT_EXPR:
2776                   case RSHIFT_EXPR:
2777                   case LROTATE_EXPR:
2778                   case RROTATE_EXPR:
2779                     op = gimple_op (stmt, op_num + 1);
2780                     /* Unlike the other binary operators, shifts/rotates have
2781                        the shift count being int, instead of the same type as
2782                        the lhs, so make sure the scalar is the right type if
2783                        we are dealing with vectors of
2784                        long long/long/short/char.  */
2785                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2786                       op = fold_convert (TREE_TYPE (vector_type), op);
2787                     break;
2788
2789                   default:
2790                     op = gimple_op (stmt, op_num + 1);
2791                     break;
2792                 }
2793             }
2794
2795           if (reduc_index != -1)
2796             {
2797               loop = (gimple_bb (stmt))->loop_father;
2798               def_stmt = SSA_NAME_DEF_STMT (op);
2799
2800               gcc_assert (loop);
2801
2802               /* Get the def before the loop.  In reduction chain we have only
2803                  one initial value.  */
2804               if ((j != (number_of_copies - 1)
2805                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2806                        && i != 0))
2807                   && neutral_op)
2808                 op = neutral_op;
2809               else
2810                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2811                                             loop_preheader_edge (loop));
2812             }
2813
2814           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2815           number_of_places_left_in_vector--;
2816           tree orig_op = op;
2817           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2818             {
2819               if (CONSTANT_CLASS_P (op))
2820                 {
2821                   op = fold_unary (VIEW_CONVERT_EXPR,
2822                                    TREE_TYPE (vector_type), op);
2823                   gcc_assert (op && CONSTANT_CLASS_P (op));
2824                 }
2825               else
2826                 {
2827                   tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
2828                   gimple init_stmt;
2829                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
2830                   init_stmt
2831                     = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
2832                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2833                   op = new_temp;
2834                 }
2835             }
2836           elts[number_of_places_left_in_vector] = op;
2837           if (!CONSTANT_CLASS_P (op))
2838             constant_p = false;
2839           if (TREE_CODE (orig_op) == SSA_NAME
2840               && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2841               && STMT_VINFO_BB_VINFO (stmt_vinfo)
2842               && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2843                   == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2844             place_after_defs = true;
2845
2846           if (number_of_places_left_in_vector == 0)
2847             {
2848               number_of_places_left_in_vector = nunits;
2849
2850               if (constant_p)
2851                 vec_cst = build_vector (vector_type, elts);
2852               else
2853                 {
2854                   vec<constructor_elt, va_gc> *v;
2855                   unsigned k;
2856                   vec_alloc (v, nunits);
2857                   for (k = 0; k < nunits; ++k)
2858                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2859                   vec_cst = build_constructor (vector_type, v);
2860                 }
2861               tree init;
2862               gimple_stmt_iterator gsi;
2863               if (place_after_defs)
2864                 {
2865                   gsi = gsi_for_stmt
2866                           (vect_find_last_scalar_stmt_in_slp (slp_node));
2867                   init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2868                 }
2869               else
2870                 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
2871               if (ctor_seq != NULL)
2872                 {
2873                   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
2874                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2875                                                         GSI_SAME_STMT);
2876                   ctor_seq = NULL;
2877                 }
2878               voprnds.quick_push (init);
2879               place_after_defs = false;
2880             }
2881         }
2882     }
2883
2884   /* Since the vectors are created in the reverse order, we should invert
2885      them.  */
2886   vec_num = voprnds.length ();
2887   for (j = vec_num; j != 0; j--)
2888     {
2889       vop = voprnds[j - 1];
2890       vec_oprnds->quick_push (vop);
2891     }
2892
2893   voprnds.release ();
2894
2895   /* In case that VF is greater than the unrolling factor needed for the SLP
2896      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2897      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2898      to replicate the vectors.  */
2899   while (number_of_vectors > vec_oprnds->length ())
2900     {
2901       tree neutral_vec = NULL;
2902
2903       if (neutral_op)
2904         {
2905           if (!neutral_vec)
2906             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2907
2908           vec_oprnds->quick_push (neutral_vec);
2909         }
2910       else
2911         {
2912           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2913             vec_oprnds->quick_push (vop);
2914         }
2915     }
2916 }
2917
2918
2919 /* Get vectorized definitions from SLP_NODE that contains corresponding
2920    vectorized def-stmts.  */
2921
2922 static void
2923 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2924 {
2925   tree vec_oprnd;
2926   gimple vec_def_stmt;
2927   unsigned int i;
2928
2929   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2930
2931   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2932     {
2933       gcc_assert (vec_def_stmt);
2934       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2935       vec_oprnds->quick_push (vec_oprnd);
2936     }
2937 }
2938
2939
2940 /* Get vectorized definitions for SLP_NODE.
2941    If the scalar definitions are loop invariants or constants, collect them and
2942    call vect_get_constant_vectors() to create vector stmts.
2943    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2944    must be stored in the corresponding child of SLP_NODE, and we call
2945    vect_get_slp_vect_defs () to retrieve them.  */
2946
2947 void
2948 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2949                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2950 {
2951   gimple first_stmt;
2952   int number_of_vects = 0, i;
2953   unsigned int child_index = 0;
2954   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2955   slp_tree child = NULL;
2956   vec<tree> vec_defs;
2957   tree oprnd;
2958   bool vectorized_defs;
2959
2960   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2961   FOR_EACH_VEC_ELT (ops, i, oprnd)
2962     {
2963       /* For each operand we check if it has vectorized definitions in a child
2964          node or we need to create them (for invariants and constants).  We
2965          check if the LHS of the first stmt of the next child matches OPRND.
2966          If it does, we found the correct child.  Otherwise, we call
2967          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2968          to check this child node for the next operand.  */
2969       vectorized_defs = false;
2970       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2971         {
2972           child = SLP_TREE_CHILDREN (slp_node)[child_index];
2973
2974           /* We have to check both pattern and original def, if available.  */
2975           if (child)
2976             {
2977               gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2978               gimple related
2979                 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2980
2981               if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2982                   || (related
2983                       && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2984                 {
2985                   /* The number of vector defs is determined by the number of
2986                      vector statements in the node from which we get those
2987                      statements.  */
2988                   number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2989                   vectorized_defs = true;
2990                   child_index++;
2991                 }
2992             }
2993           else
2994             child_index++;
2995         }
2996
2997       if (!vectorized_defs)
2998         {
2999           if (i == 0)
3000             {
3001               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3002               /* Number of vector stmts was calculated according to LHS in
3003                  vect_schedule_slp_instance (), fix it by replacing LHS with
3004                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
3005                  details.  */
3006               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3007                                              &rhs_size_unit);
3008               if (rhs_size_unit != lhs_size_unit)
3009                 {
3010                   number_of_vects *= rhs_size_unit;
3011                   number_of_vects /= lhs_size_unit;
3012                 }
3013             }
3014         }
3015
3016       /* Allocate memory for vectorized defs.  */
3017       vec_defs = vNULL;
3018       vec_defs.create (number_of_vects);
3019
3020       /* For reduction defs we call vect_get_constant_vectors (), since we are
3021          looking for initial loop invariant values.  */
3022       if (vectorized_defs && reduc_index == -1)
3023         /* The defs are already vectorized.  */
3024         vect_get_slp_vect_defs (child, &vec_defs);
3025       else
3026         /* Build vectors from scalar defs.  */
3027         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3028                                    number_of_vects, reduc_index);
3029
3030       vec_oprnds->quick_push (vec_defs);
3031
3032       /* For reductions, we only need initial values.  */
3033       if (reduc_index != -1)
3034         return;
3035     }
3036 }
3037
3038
3039 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
3040    building a vector of type MASK_TYPE from it) and two input vectors placed in
3041    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3042    shifting by STRIDE elements of DR_CHAIN for every copy.
3043    (STRIDE is the number of vectorized stmts for NODE divided by the number of
3044    copies).
3045    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3046    the created stmts must be inserted.  */
3047
3048 static inline void
3049 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
3050                            tree mask, int first_vec_indx, int second_vec_indx,
3051                            gimple_stmt_iterator *gsi, slp_tree node,
3052                            tree vectype, vec<tree> dr_chain,
3053                            int ncopies, int vect_stmts_counter)
3054 {
3055   tree perm_dest;
3056   gimple perm_stmt = NULL;
3057   stmt_vec_info next_stmt_info;
3058   int i, stride;
3059   tree first_vec, second_vec, data_ref;
3060
3061   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3062
3063   /* Initialize the vect stmts of NODE to properly insert the generated
3064      stmts later.  */
3065   for (i = SLP_TREE_VEC_STMTS (node).length ();
3066        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3067     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3068
3069   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3070   for (i = 0; i < ncopies; i++)
3071     {
3072       first_vec = dr_chain[first_vec_indx];
3073       second_vec = dr_chain[second_vec_indx];
3074
3075       /* Generate the permute statement.  */
3076       perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3077                                        first_vec, second_vec, mask);
3078       data_ref = make_ssa_name (perm_dest, perm_stmt);
3079       gimple_set_lhs (perm_stmt, data_ref);
3080       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3081
3082       /* Store the vector statement in NODE.  */
3083       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
3084
3085       first_vec_indx += stride;
3086       second_vec_indx += stride;
3087     }
3088
3089   /* Mark the scalar stmt as vectorized.  */
3090   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
3091   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
3092 }
3093
3094
3095 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
3096    return in CURRENT_MASK_ELEMENT its equivalent in target specific
3097    representation.  Check that the mask is valid and return FALSE if not.
3098    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3099    the next vector, i.e., the current first vector is not needed.  */
3100
3101 static bool
3102 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
3103                        int mask_nunits, bool only_one_vec, int index,
3104                        unsigned char *mask, int *current_mask_element,
3105                        bool *need_next_vector, int *number_of_mask_fixes,
3106                        bool *mask_fixed, bool *needs_first_vector)
3107 {
3108   int i;
3109
3110   /* Convert to target specific representation.  */
3111   *current_mask_element = first_mask_element + m;
3112   /* Adjust the value in case it's a mask for second and third vectors.  */
3113   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
3114
3115   if (*current_mask_element < 0)
3116     {
3117       if (dump_enabled_p ())
3118         {
3119           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3120                            "permutation requires past vector ");
3121           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3122           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3123         }
3124       return false;
3125     }
3126
3127   if (*current_mask_element < mask_nunits)
3128     *needs_first_vector = true;
3129
3130   /* We have only one input vector to permute but the mask accesses values in
3131      the next vector as well.  */
3132   if (only_one_vec && *current_mask_element >= mask_nunits)
3133     {
3134       if (dump_enabled_p ())
3135         {
3136           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3137                            "permutation requires at least two vectors ");
3138           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3139           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3140         }
3141
3142       return false;
3143     }
3144
3145   /* The mask requires the next vector.  */
3146   while (*current_mask_element >= mask_nunits * 2)
3147     {
3148       if (*needs_first_vector || *mask_fixed)
3149         {
3150           /* We either need the first vector too or have already moved to the
3151              next vector. In both cases, this permutation needs three
3152              vectors.  */
3153           if (dump_enabled_p ())
3154             {
3155               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3156                                "permutation requires at "
3157                                "least three vectors ");
3158               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3159               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3160             }
3161
3162           return false;
3163         }
3164
3165       /* We move to the next vector, dropping the first one and working with
3166          the second and the third - we need to adjust the values of the mask
3167          accordingly.  */
3168       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
3169
3170       for (i = 0; i < index; i++)
3171         mask[i] -= mask_nunits * *number_of_mask_fixes;
3172
3173       (*number_of_mask_fixes)++;
3174       *mask_fixed = true;
3175     }
3176
3177   *need_next_vector = *mask_fixed;
3178
3179   /* This was the last element of this mask. Start a new one.  */
3180   if (index == mask_nunits - 1)
3181     {
3182       *number_of_mask_fixes = 1;
3183       *mask_fixed = false;
3184       *needs_first_vector = false;
3185     }
3186
3187   return true;
3188 }
3189
3190
3191 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3192    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3193    permute statements for the SLP node NODE of the SLP instance
3194    SLP_NODE_INSTANCE.  */
3195
3196 bool
3197 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3198                               gimple_stmt_iterator *gsi, int vf,
3199                               slp_instance slp_node_instance, bool analyze_only)
3200 {
3201   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3202   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3203   tree mask_element_type = NULL_TREE, mask_type;
3204   int i, j, k, nunits, vec_index = 0, scalar_index;
3205   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3206   gimple next_scalar_stmt;
3207   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3208   int first_mask_element;
3209   int index, unroll_factor, current_mask_element, ncopies;
3210   unsigned char *mask;
3211   bool only_one_vec = false, need_next_vector = false;
3212   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
3213   int number_of_mask_fixes = 1;
3214   bool mask_fixed = false;
3215   bool needs_first_vector = false;
3216   machine_mode mode;
3217
3218   mode = TYPE_MODE (vectype);
3219
3220   if (!can_vec_perm_p (mode, false, NULL))
3221     {
3222       if (dump_enabled_p ())
3223         {
3224           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3225                            "no vect permute for ");
3226           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3227           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3228         }
3229       return false;
3230     }
3231
3232   /* The generic VEC_PERM_EXPR code always uses an integral type of the
3233      same size as the vector element being permuted.  */
3234   mask_element_type = lang_hooks.types.type_for_mode
3235                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3236   mask_type = get_vectype_for_scalar_type (mask_element_type);
3237   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3238   mask = XALLOCAVEC (unsigned char, nunits);
3239   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3240
3241   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3242      unrolling factor.  */
3243   orig_vec_stmts_num = group_size *
3244                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
3245   if (orig_vec_stmts_num == 1)
3246     only_one_vec = true;
3247
3248   /* Number of copies is determined by the final vectorization factor
3249      relatively to SLP_NODE_INSTANCE unrolling factor.  */
3250   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3251
3252   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3253     return false;
3254
3255   /* Generate permutation masks for every NODE. Number of masks for each NODE
3256      is equal to GROUP_SIZE.
3257      E.g., we have a group of three nodes with three loads from the same
3258      location in each node, and the vector size is 4. I.e., we have a
3259      a0b0c0a1b1c1... sequence and we need to create the following vectors:
3260      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3261      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3262      ...
3263
3264      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3265      The last mask is illegal since we assume two operands for permute
3266      operation, and the mask element values can't be outside that range.
3267      Hence, the last mask must be converted into {2,5,5,5}.
3268      For the first two permutations we need the first and the second input
3269      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3270      we need the second and the third vectors: {b1,c1,a2,b2} and
3271      {c2,a3,b3,c3}.  */
3272
3273   {
3274       scalar_index = 0;
3275       index = 0;
3276       vect_stmts_counter = 0;
3277       vec_index = 0;
3278       first_vec_index = vec_index++;
3279       if (only_one_vec)
3280         second_vec_index = first_vec_index;
3281       else
3282         second_vec_index =  vec_index++;
3283
3284       for (j = 0; j < unroll_factor; j++)
3285         {
3286           for (k = 0; k < group_size; k++)
3287             {
3288               i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3289               first_mask_element = i + j * group_size;
3290               if (!vect_get_mask_element (stmt, first_mask_element, 0,
3291                                           nunits, only_one_vec, index,
3292                                           mask, &current_mask_element,
3293                                           &need_next_vector,
3294                                           &number_of_mask_fixes, &mask_fixed,
3295                                           &needs_first_vector))
3296                 return false;
3297               gcc_assert (current_mask_element >= 0
3298                           && current_mask_element < 2 * nunits);
3299               mask[index++] = current_mask_element;
3300
3301               if (index == nunits)
3302                 {
3303                   index = 0;
3304                   if (!can_vec_perm_p (mode, false, mask))
3305                     {
3306                       if (dump_enabled_p ())
3307                         {
3308                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3309                                            vect_location, 
3310                                            "unsupported vect permute { ");
3311                           for (i = 0; i < nunits; ++i)
3312                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3313                                          mask[i]);
3314                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3315                         }
3316                       return false;
3317                     }
3318
3319                   if (!analyze_only)
3320                     {
3321                       int l;
3322                       tree mask_vec, *mask_elts;
3323                       mask_elts = XALLOCAVEC (tree, nunits);
3324                       for (l = 0; l < nunits; ++l)
3325                         mask_elts[l] = build_int_cst (mask_element_type,
3326                                                       mask[l]);
3327                       mask_vec = build_vector (mask_type, mask_elts);
3328
3329                       if (need_next_vector)
3330                         {
3331                           first_vec_index = second_vec_index;
3332                           second_vec_index = vec_index;
3333                         }
3334
3335                       next_scalar_stmt
3336                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3337
3338                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
3339                                mask_vec, first_vec_index, second_vec_index,
3340                                gsi, node, vectype, dr_chain,
3341                                ncopies, vect_stmts_counter++);
3342                     }
3343                 }
3344             }
3345         }
3346     }
3347
3348   return true;
3349 }
3350
3351
3352
3353 /* Vectorize SLP instance tree in postorder.  */
3354
3355 static bool
3356 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3357                             unsigned int vectorization_factor)
3358 {
3359   gimple stmt;
3360   bool grouped_store, is_store;
3361   gimple_stmt_iterator si;
3362   stmt_vec_info stmt_info;
3363   unsigned int vec_stmts_size, nunits, group_size;
3364   tree vectype;
3365   int i;
3366   slp_tree child;
3367
3368   if (!node)
3369     return false;
3370
3371   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3372     vect_schedule_slp_instance (child, instance, vectorization_factor);
3373
3374   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3375   stmt_info = vinfo_for_stmt (stmt);
3376
3377   /* VECTYPE is the type of the destination.  */
3378   vectype = STMT_VINFO_VECTYPE (stmt_info);
3379   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3380   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3381
3382   /* For each SLP instance calculate number of vector stmts to be created
3383      for the scalar stmts in each node of the SLP tree.  Number of vector
3384      elements in one vector iteration is the number of scalar elements in
3385      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3386      size.  */
3387   vec_stmts_size = (vectorization_factor * group_size) / nunits;
3388
3389   if (!SLP_TREE_VEC_STMTS (node).exists ())
3390     {
3391       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3392       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3393     }
3394
3395   if (dump_enabled_p ())
3396     {
3397       dump_printf_loc (MSG_NOTE,vect_location,
3398                        "------>vectorizing SLP node starting from: ");
3399       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3400       dump_printf (MSG_NOTE, "\n");
3401     }
3402
3403   /* Vectorized stmts go before the last scalar stmt which is where
3404      all uses are ready.  */
3405   si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3406
3407   /* Mark the first element of the reduction chain as reduction to properly
3408      transform the node.  In the analysis phase only the last element of the
3409      chain is marked as reduction.  */
3410   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3411       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3412     {
3413       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3414       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3415     }
3416
3417   /* Handle two-operation SLP nodes by vectorizing the group with
3418      both operations and then performing a merge.  */
3419   if (SLP_TREE_TWO_OPERATORS (node))
3420     {
3421       enum tree_code code0 = gimple_assign_rhs_code (stmt);
3422       enum tree_code ocode;
3423       gimple ostmt;
3424       unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3425       bool allsame = true;
3426       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3427         if (gimple_assign_rhs_code (ostmt) != code0)
3428           {
3429             mask[i] = 1;
3430             allsame = false;
3431             ocode = gimple_assign_rhs_code (ostmt);
3432           }
3433         else
3434           mask[i] = 0;
3435       if (!allsame)
3436         {
3437           vec<gimple> v0;
3438           vec<gimple> v1;
3439           unsigned j;
3440           tree tmask = NULL_TREE;
3441           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3442           v0 = SLP_TREE_VEC_STMTS (node).copy ();
3443           SLP_TREE_VEC_STMTS (node).truncate (0);
3444           gimple_assign_set_rhs_code (stmt, ocode);
3445           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3446           gimple_assign_set_rhs_code (stmt, code0);
3447           v1 = SLP_TREE_VEC_STMTS (node).copy ();
3448           SLP_TREE_VEC_STMTS (node).truncate (0);
3449           tree meltype = build_nonstandard_integer_type
3450               (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3451           tree mvectype = get_same_sized_vectype (meltype, vectype);
3452           unsigned k = 0, l;
3453           for (j = 0; j < v0.length (); ++j)
3454             {
3455               tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3456               for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3457                 {
3458                   if (k >= group_size)
3459                     k = 0;
3460                   melts[l] = build_int_cst
3461                       (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3462                 }
3463               tmask = build_vector (mvectype, melts);
3464
3465               /* ???  Not all targets support a VEC_PERM_EXPR with a
3466                  constant mask that would translate to a vec_merge RTX
3467                  (with their vec_perm_const_ok).  We can either not
3468                  vectorize in that case or let veclower do its job.
3469                  Unfortunately that isn't too great and at least for
3470                  plus/minus we'd eventually like to match targets
3471                  vector addsub instructions.  */
3472               gimple vstmt;
3473               vstmt = gimple_build_assign (make_ssa_name (vectype),
3474                                            VEC_PERM_EXPR,
3475                                            gimple_assign_lhs (v0[j]),
3476                                            gimple_assign_lhs (v1[j]), tmask);
3477               vect_finish_stmt_generation (stmt, vstmt, &si);
3478               SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3479             }
3480           v0.release ();
3481           v1.release ();
3482           return false;
3483         }
3484     }
3485   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3486   return is_store;
3487 }
3488
3489 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3490    For loop vectorization this is done in vectorizable_call, but for SLP
3491    it needs to be deferred until end of vect_schedule_slp, because multiple
3492    SLP instances may refer to the same scalar stmt.  */
3493
3494 static void
3495 vect_remove_slp_scalar_calls (slp_tree node)
3496 {
3497   gimple stmt, new_stmt;
3498   gimple_stmt_iterator gsi;
3499   int i;
3500   slp_tree child;
3501   tree lhs;
3502   stmt_vec_info stmt_info;
3503
3504   if (!node)
3505     return;
3506
3507   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3508     vect_remove_slp_scalar_calls (child);
3509
3510   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3511     {
3512       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3513         continue;
3514       stmt_info = vinfo_for_stmt (stmt);
3515       if (stmt_info == NULL
3516           || is_pattern_stmt_p (stmt_info)
3517           || !PURE_SLP_STMT (stmt_info))
3518         continue;
3519       lhs = gimple_call_lhs (stmt);
3520       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3521       set_vinfo_for_stmt (new_stmt, stmt_info);
3522       set_vinfo_for_stmt (stmt, NULL);
3523       STMT_VINFO_STMT (stmt_info) = new_stmt;
3524       gsi = gsi_for_stmt (stmt);
3525       gsi_replace (&gsi, new_stmt, false);
3526       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3527     }
3528 }
3529
3530 /* Generate vector code for all SLP instances in the loop/basic block.  */
3531
3532 bool
3533 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3534 {
3535   vec<slp_instance> slp_instances;
3536   slp_instance instance;
3537   unsigned int i, vf;
3538   bool is_store = false;
3539
3540   if (loop_vinfo)
3541     {
3542       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3543       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3544     }
3545   else
3546     {
3547       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3548       vf = 1;
3549     }
3550
3551   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3552     {
3553       /* Schedule the tree of INSTANCE.  */
3554       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3555                                              instance, vf);
3556       if (dump_enabled_p ())
3557         dump_printf_loc (MSG_NOTE, vect_location,
3558                          "vectorizing stmts using SLP.\n");
3559     }
3560
3561   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3562     {
3563       slp_tree root = SLP_INSTANCE_TREE (instance);
3564       gimple store;
3565       unsigned int j;
3566       gimple_stmt_iterator gsi;
3567
3568       /* Remove scalar call stmts.  Do not do this for basic-block
3569          vectorization as not all uses may be vectorized.
3570          ???  Why should this be necessary?  DCE should be able to
3571          remove the stmts itself.
3572          ???  For BB vectorization we can as well remove scalar
3573          stmts starting from the SLP tree root if they have no
3574          uses.  */
3575       if (loop_vinfo)
3576         vect_remove_slp_scalar_calls (root);
3577
3578       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3579                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3580         {
3581           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3582             break;
3583
3584          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3585            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3586           /* Free the attached stmt_vec_info and remove the stmt.  */
3587           gsi = gsi_for_stmt (store);
3588           unlink_stmt_vdef (store);
3589           gsi_remove (&gsi, true);
3590           release_defs (store);
3591           free_stmt_vec_info (store);
3592         }
3593     }
3594
3595   return is_store;
3596 }
3597
3598
3599 /* Vectorize the basic block.  */
3600
3601 void
3602 vect_slp_transform_bb (basic_block bb)
3603 {
3604   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3605   gimple_stmt_iterator si;
3606
3607   gcc_assert (bb_vinfo);
3608
3609   if (dump_enabled_p ())
3610     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3611
3612   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3613     {
3614       gimple stmt = gsi_stmt (si);
3615       stmt_vec_info stmt_info;
3616
3617       if (dump_enabled_p ())
3618         {
3619           dump_printf_loc (MSG_NOTE, vect_location,
3620                            "------>SLPing statement: ");
3621           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3622           dump_printf (MSG_NOTE, "\n");
3623         }
3624
3625       stmt_info = vinfo_for_stmt (stmt);
3626       gcc_assert (stmt_info);
3627
3628       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3629       if (STMT_SLP_TYPE (stmt_info))
3630         {
3631           vect_schedule_slp (NULL, bb_vinfo);
3632           break;
3633         }
3634     }
3635
3636   if (dump_enabled_p ())
3637     dump_printf_loc (MSG_NOTE, vect_location,
3638                      "BASIC BLOCK VECTORIZED\n");
3639
3640   destroy_bb_vec_info (bb_vinfo);
3641 }