re PR tree-optimization/65961 (ice in vect_is_simple_use_1 with -O3)
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2015 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "basic-block.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-ssa-alias.h"
46 #include "internal-fn.h"
47 #include "gimple-expr.h"
48 #include "is-a.h"
49 #include "gimple.h"
50 #include "gimple-iterator.h"
51 #include "gimple-ssa.h"
52 #include "tree-phinodes.h"
53 #include "ssa-iterators.h"
54 #include "stringpool.h"
55 #include "tree-ssanames.h"
56 #include "tree-pass.h"
57 #include "cfgloop.h"
58 #include "hashtab.h"
59 #include "rtl.h"
60 #include "flags.h"
61 #include "statistics.h"
62 #include "real.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
65 #include "expmed.h"
66 #include "dojump.h"
67 #include "explow.h"
68 #include "calls.h"
69 #include "emit-rtl.h"
70 #include "varasm.h"
71 #include "stmt.h"
72 #include "expr.h"
73 #include "recog.h"              /* FIXME: for insn_data */
74 #include "insn-codes.h"
75 #include "optabs.h"
76 #include "tree-vectorizer.h"
77 #include "langhooks.h"
78 #include "gimple-walk.h"
79
80 /* Extract the location of the basic block in the source code.
81    Return the basic block location if succeed and NULL if not.  */
82
83 source_location
84 find_bb_location (basic_block bb)
85 {
86   gimple stmt = NULL;
87   gimple_stmt_iterator si;
88
89   if (!bb)
90     return UNKNOWN_LOCATION;
91
92   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
93     {
94       stmt = gsi_stmt (si);
95       if (gimple_location (stmt) != UNKNOWN_LOCATION)
96         return gimple_location (stmt);
97     }
98
99   return UNKNOWN_LOCATION;
100 }
101
102
103 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
104
105 static void
106 vect_free_slp_tree (slp_tree node)
107 {
108   int i;
109   slp_tree child;
110
111   if (!node)
112     return;
113
114   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
115     vect_free_slp_tree (child);
116
117   SLP_TREE_CHILDREN (node).release ();
118   SLP_TREE_SCALAR_STMTS (node).release ();
119   SLP_TREE_VEC_STMTS (node).release ();
120   SLP_TREE_LOAD_PERMUTATION (node).release ();
121
122   free (node);
123 }
124
125
126 /* Free the memory allocated for the SLP instance.  */
127
128 void
129 vect_free_slp_instance (slp_instance instance)
130 {
131   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
132   SLP_INSTANCE_LOADS (instance).release ();
133   free (instance);
134 }
135
136
137 /* Create an SLP node for SCALAR_STMTS.  */
138
139 static slp_tree
140 vect_create_new_slp_node (vec<gimple> scalar_stmts)
141 {
142   slp_tree node;
143   gimple stmt = scalar_stmts[0];
144   unsigned int nops;
145
146   if (is_gimple_call (stmt))
147     nops = gimple_call_num_args (stmt);
148   else if (is_gimple_assign (stmt))
149     {
150       nops = gimple_num_ops (stmt) - 1;
151       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
152         nops++;
153     }
154   else
155     return NULL;
156
157   node = XNEW (struct _slp_tree);
158   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
159   SLP_TREE_VEC_STMTS (node).create (0);
160   SLP_TREE_CHILDREN (node).create (nops);
161   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
162   SLP_TREE_TWO_OPERATORS (node) = false;
163
164   return node;
165 }
166
167
168 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
169    operand.  */
170 static vec<slp_oprnd_info> 
171 vect_create_oprnd_info (int nops, int group_size)
172 {
173   int i;
174   slp_oprnd_info oprnd_info;
175   vec<slp_oprnd_info> oprnds_info;
176
177   oprnds_info.create (nops);
178   for (i = 0; i < nops; i++)
179     {
180       oprnd_info = XNEW (struct _slp_oprnd_info);
181       oprnd_info->def_stmts.create (group_size);
182       oprnd_info->first_dt = vect_uninitialized_def;
183       oprnd_info->first_op_type = NULL_TREE;
184       oprnd_info->first_pattern = false;
185       oprnd_info->second_pattern = false;
186       oprnds_info.quick_push (oprnd_info);
187     }
188
189   return oprnds_info;
190 }
191
192
193 /* Free operands info.  */
194
195 static void
196 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
197 {
198   int i;
199   slp_oprnd_info oprnd_info;
200
201   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
202     {
203       oprnd_info->def_stmts.release ();
204       XDELETE (oprnd_info);
205     }
206
207   oprnds_info.release ();
208 }
209
210
211 /* Find the place of the data-ref in STMT in the interleaving chain that starts
212    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
213
214 static int
215 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
216 {
217   gimple next_stmt = first_stmt;
218   int result = 0;
219
220   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
221     return -1;
222
223   do
224     {
225       if (next_stmt == stmt)
226         return result;
227       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
228       if (next_stmt)
229         result += GROUP_GAP (vinfo_for_stmt (next_stmt));
230     }
231   while (next_stmt);
232
233   return -1;
234 }
235
236
237 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
238    they are of a valid type and that they match the defs of the first stmt of
239    the SLP group (stored in OPRNDS_INFO).  If there was a fatal error
240    return -1, if the error could be corrected by swapping operands of the
241    operation return 1, if everything is ok return 0.  */
242
243 static int 
244 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
245                              gimple stmt, unsigned stmt_num,
246                              vec<slp_oprnd_info> *oprnds_info)
247 {
248   tree oprnd;
249   unsigned int i, number_of_oprnds;
250   tree def;
251   gimple def_stmt;
252   enum vect_def_type dt = vect_uninitialized_def;
253   struct loop *loop = NULL;
254   bool pattern = false;
255   slp_oprnd_info oprnd_info;
256   int first_op_idx = 1;
257   bool commutative = false;
258   bool first_op_cond = false;
259   bool first = stmt_num == 0;
260   bool second = stmt_num == 1;
261
262   if (loop_vinfo)
263     loop = LOOP_VINFO_LOOP (loop_vinfo);
264
265   if (is_gimple_call (stmt))
266     {
267       number_of_oprnds = gimple_call_num_args (stmt);
268       first_op_idx = 3;
269     }
270   else if (is_gimple_assign (stmt))
271     {
272       enum tree_code code = gimple_assign_rhs_code (stmt);
273       number_of_oprnds = gimple_num_ops (stmt) - 1;
274       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
275         {
276           first_op_cond = true;
277           commutative = true;
278           number_of_oprnds++;
279         }
280       else
281         commutative = commutative_tree_code (code);
282     }
283   else
284     return -1;
285
286   bool swapped = false;
287   for (i = 0; i < number_of_oprnds; i++)
288     {
289 again:
290       if (first_op_cond)
291         {
292           if (i == 0 || i == 1)
293             oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
294                                   swapped ? !i : i);
295           else
296             oprnd = gimple_op (stmt, first_op_idx + i - 1);
297         }
298       else
299         oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
300
301       oprnd_info = (*oprnds_info)[i];
302
303       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
304                                &def, &dt))
305         {
306           if (dump_enabled_p ())
307             {
308               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309                                "Build SLP failed: can't analyze def for ");
310               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
311               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
312             }
313
314           return -1;
315         }
316
317       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
318          from the pattern.  Check that all the stmts of the node are in the
319          pattern.  */
320       if (def_stmt && gimple_bb (def_stmt)
321           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
322               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
323                   && gimple_code (def_stmt) != GIMPLE_PHI))
324           && vinfo_for_stmt (def_stmt)
325           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
326           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
327           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
328         {
329           pattern = true;
330           if (!first && !oprnd_info->first_pattern
331               /* Allow different pattern state for the defs of the
332                  first stmt in reduction chains.  */
333               && (oprnd_info->first_dt != vect_reduction_def
334                   || (!second && !oprnd_info->second_pattern)))
335             {
336               if (i == 0
337                   && !swapped
338                   && commutative)
339                 {
340                   swapped = true;
341                   goto again;
342                 }
343
344               if (dump_enabled_p ())
345                 {
346                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
347                                    "Build SLP failed: some of the stmts"
348                                    " are in a pattern, and others are not ");
349                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
350                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
351                 }
352
353               return 1;
354             }
355
356           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
357           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
358
359           if (dt == vect_unknown_def_type)
360             {
361               if (dump_enabled_p ())
362                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
363                                  "Unsupported pattern.\n");
364               return -1;
365             }
366
367           switch (gimple_code (def_stmt))
368             {
369               case GIMPLE_PHI:
370                 def = gimple_phi_result (def_stmt);
371                 break;
372
373               case GIMPLE_ASSIGN:
374                 def = gimple_assign_lhs (def_stmt);
375                 break;
376
377               default:
378                 if (dump_enabled_p ())
379                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
380                                    "unsupported defining stmt:\n");
381                 return -1;
382             }
383         }
384
385       if (second)
386         oprnd_info->second_pattern = pattern;
387
388       if (first)
389         {
390           oprnd_info->first_dt = dt;
391           oprnd_info->first_pattern = pattern;
392           oprnd_info->first_op_type = TREE_TYPE (oprnd);
393         }
394       else
395         {
396           /* Not first stmt of the group, check that the def-stmt/s match
397              the def-stmt/s of the first stmt.  Allow different definition
398              types for reduction chains: the first stmt must be a
399              vect_reduction_def (a phi node), and the rest
400              vect_internal_def.  */
401           if (((oprnd_info->first_dt != dt
402                 && !(oprnd_info->first_dt == vect_reduction_def
403                      && dt == vect_internal_def)
404                 && !((oprnd_info->first_dt == vect_external_def
405                       || oprnd_info->first_dt == vect_constant_def)
406                      && (dt == vect_external_def
407                          || dt == vect_constant_def)))
408                || !types_compatible_p (oprnd_info->first_op_type,
409                                        TREE_TYPE (oprnd))))
410             {
411               /* Try swapping operands if we got a mismatch.  */
412               if (i == 0
413                   && !swapped
414                   && commutative)
415                 {
416                   swapped = true;
417                   goto again;
418                 }
419
420               if (dump_enabled_p ())
421                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
422                                  "Build SLP failed: different types\n");
423
424               return 1;
425             }
426         }
427
428       /* Check the types of the definitions.  */
429       switch (dt)
430         {
431         case vect_constant_def:
432         case vect_external_def:
433         case vect_reduction_def:
434           break;
435
436         case vect_internal_def:
437           oprnd_info->def_stmts.quick_push (def_stmt);
438           break;
439
440         default:
441           /* FORNOW: Not supported.  */
442           if (dump_enabled_p ())
443             {
444               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
445                                "Build SLP failed: illegal type of def ");
446               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
447               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
448             }
449
450           return -1;
451         }
452     }
453
454   /* Swap operands.  */
455   if (swapped)
456     {
457       if (first_op_cond)
458         {
459           tree cond = gimple_assign_rhs1 (stmt);
460           swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
461                              &TREE_OPERAND (cond, 1));
462           TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
463         }
464       else
465         swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
466                            gimple_assign_rhs2_ptr (stmt));
467     }
468
469   return 0;
470 }
471
472
473 /* Verify if the scalar stmts STMTS are isomorphic, require data
474    permutation or are of unsupported types of operation.  Return
475    true if they are, otherwise return false and indicate in *MATCHES
476    which stmts are not isomorphic to the first one.  If MATCHES[0]
477    is false then this indicates the comparison could not be
478    carried out or the stmts will never be vectorized by SLP.  */
479
480 static bool
481 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
482                        vec<gimple> stmts, unsigned int group_size,
483                        unsigned nops, unsigned int *max_nunits,
484                        unsigned int vectorization_factor, bool *matches,
485                        bool *two_operators)
486 {
487   unsigned int i;
488   gimple first_stmt = stmts[0], stmt = stmts[0];
489   enum tree_code first_stmt_code = ERROR_MARK;
490   enum tree_code alt_stmt_code = ERROR_MARK;
491   enum tree_code rhs_code = ERROR_MARK;
492   enum tree_code first_cond_code = ERROR_MARK;
493   tree lhs;
494   bool need_same_oprnds = false;
495   tree vectype, scalar_type, first_op1 = NULL_TREE;
496   optab optab;
497   int icode;
498   machine_mode optab_op2_mode;
499   machine_mode vec_mode;
500   struct data_reference *first_dr;
501   HOST_WIDE_INT dummy;
502   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
503   tree cond;
504
505   /* For every stmt in NODE find its def stmt/s.  */
506   FOR_EACH_VEC_ELT (stmts, i, stmt)
507     {
508       matches[i] = false;
509
510       if (dump_enabled_p ())
511         {
512           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
513           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
514           dump_printf (MSG_NOTE, "\n");
515         }
516
517       /* Fail to vectorize statements marked as unvectorizable.  */
518       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
519         {
520           if (dump_enabled_p ())
521             {
522               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
523                                "Build SLP failed: unvectorizable statement ");
524               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
525               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
526             }
527           /* Fatal mismatch.  */
528           matches[0] = false;
529           return false;
530         }
531
532       lhs = gimple_get_lhs (stmt);
533       if (lhs == NULL_TREE)
534         {
535           if (dump_enabled_p ())
536             {
537               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
538                                "Build SLP failed: not GIMPLE_ASSIGN nor "
539                                "GIMPLE_CALL ");
540               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
541               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
542             }
543           /* Fatal mismatch.  */
544           matches[0] = false;
545           return false;
546         }
547
548        if (is_gimple_assign (stmt)
549            && gimple_assign_rhs_code (stmt) == COND_EXPR
550            && (cond = gimple_assign_rhs1 (stmt))
551            && !COMPARISON_CLASS_P (cond))
552         {
553           if (dump_enabled_p ())
554             {
555               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
556                                "Build SLP failed: condition is not "
557                                "comparison ");
558               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
559               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
560             }
561           /* Fatal mismatch.  */
562           matches[0] = false;
563           return false;
564         }
565
566       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
567       vectype = get_vectype_for_scalar_type (scalar_type);
568       if (!vectype)
569         {
570           if (dump_enabled_p ())
571             {
572               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
573                                "Build SLP failed: unsupported data-type ");
574               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
575                                  scalar_type);
576               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
577             }
578           /* Fatal mismatch.  */
579           matches[0] = false;
580           return false;
581         }
582
583       /* If populating the vector type requires unrolling then fail
584          before adjusting *max_nunits for basic-block vectorization.  */
585       if (bb_vinfo
586           && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
587         {
588           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
589                            "Build SLP failed: unrolling required "
590                            "in basic block SLP\n");
591           /* Fatal mismatch.  */
592           matches[0] = false;
593           return false;
594         }
595
596       /* In case of multiple types we need to detect the smallest type.  */
597       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
598         {
599           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
600           if (bb_vinfo)
601             vectorization_factor = *max_nunits;
602         }
603
604       if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
605         {
606           rhs_code = CALL_EXPR;
607           if (gimple_call_internal_p (call_stmt)
608               || gimple_call_tail_p (call_stmt)
609               || gimple_call_noreturn_p (call_stmt)
610               || !gimple_call_nothrow_p (call_stmt)
611               || gimple_call_chain (call_stmt))
612             {
613               if (dump_enabled_p ())
614                 {
615                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
616                                    "Build SLP failed: unsupported call type ");
617                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
618                                     call_stmt, 0);
619                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
620                 }
621               /* Fatal mismatch.  */
622               matches[0] = false;
623               return false;
624             }
625         }
626       else
627         rhs_code = gimple_assign_rhs_code (stmt);
628
629       /* Check the operation.  */
630       if (i == 0)
631         {
632           first_stmt_code = rhs_code;
633
634           /* Shift arguments should be equal in all the packed stmts for a
635              vector shift with scalar shift operand.  */
636           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
637               || rhs_code == LROTATE_EXPR
638               || rhs_code == RROTATE_EXPR)
639             {
640               vec_mode = TYPE_MODE (vectype);
641
642               /* First see if we have a vector/vector shift.  */
643               optab = optab_for_tree_code (rhs_code, vectype,
644                                            optab_vector);
645
646               if (!optab
647                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
648                 {
649                   /* No vector/vector shift, try for a vector/scalar shift.  */
650                   optab = optab_for_tree_code (rhs_code, vectype,
651                                                optab_scalar);
652
653                   if (!optab)
654                     {
655                       if (dump_enabled_p ())
656                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
657                                          "Build SLP failed: no optab.\n");
658                       /* Fatal mismatch.  */
659                       matches[0] = false;
660                       return false;
661                     }
662                   icode = (int) optab_handler (optab, vec_mode);
663                   if (icode == CODE_FOR_nothing)
664                     {
665                       if (dump_enabled_p ())
666                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
667                                          "Build SLP failed: "
668                                          "op not supported by target.\n");
669                       /* Fatal mismatch.  */
670                       matches[0] = false;
671                       return false;
672                     }
673                   optab_op2_mode = insn_data[icode].operand[2].mode;
674                   if (!VECTOR_MODE_P (optab_op2_mode))
675                     {
676                       need_same_oprnds = true;
677                       first_op1 = gimple_assign_rhs2 (stmt);
678                     }
679                 }
680             }
681           else if (rhs_code == WIDEN_LSHIFT_EXPR)
682             {
683               need_same_oprnds = true;
684               first_op1 = gimple_assign_rhs2 (stmt);
685             }
686         }
687       else
688         {
689           if (first_stmt_code != rhs_code
690               && alt_stmt_code == ERROR_MARK)
691             alt_stmt_code = rhs_code;
692           if (first_stmt_code != rhs_code
693               && (first_stmt_code != IMAGPART_EXPR
694                   || rhs_code != REALPART_EXPR)
695               && (first_stmt_code != REALPART_EXPR
696                   || rhs_code != IMAGPART_EXPR)
697               /* Handle mismatches in plus/minus by computing both
698                  and merging the results.  */
699               && !((first_stmt_code == PLUS_EXPR
700                     || first_stmt_code == MINUS_EXPR)
701                    && (alt_stmt_code == PLUS_EXPR
702                        || alt_stmt_code == MINUS_EXPR)
703                    && rhs_code == alt_stmt_code)
704               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
705                    && (first_stmt_code == ARRAY_REF
706                        || first_stmt_code == BIT_FIELD_REF
707                        || first_stmt_code == INDIRECT_REF
708                        || first_stmt_code == COMPONENT_REF
709                        || first_stmt_code == MEM_REF)))
710             {
711               if (dump_enabled_p ())
712                 {
713                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
714                                    "Build SLP failed: different operation "
715                                    "in stmt ");
716                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
717                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
718                                    "original stmt ");
719                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
720                                     first_stmt, 0);
721                 }
722               /* Mismatch.  */
723               continue;
724             }
725
726           if (need_same_oprnds
727               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
728             {
729               if (dump_enabled_p ())
730                 {
731                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
732                                    "Build SLP failed: different shift "
733                                    "arguments in ");
734                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
735                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
736                 }
737               /* Mismatch.  */
738               continue;
739             }
740
741           if (rhs_code == CALL_EXPR)
742             {
743               gimple first_stmt = stmts[0];
744               if (gimple_call_num_args (stmt) != nops
745                   || !operand_equal_p (gimple_call_fn (first_stmt),
746                                        gimple_call_fn (stmt), 0)
747                   || gimple_call_fntype (first_stmt)
748                      != gimple_call_fntype (stmt))
749                 {
750                   if (dump_enabled_p ())
751                     {
752                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
753                                        "Build SLP failed: different calls in ");
754                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
755                                         stmt, 0);
756                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
757                     }
758                   /* Mismatch.  */
759                   continue;
760                 }
761             }
762         }
763
764       /* Grouped store or load.  */
765       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
766         {
767           if (REFERENCE_CLASS_P (lhs))
768             {
769               /* Store.  */
770               ;
771             }
772           else
773             {
774               /* Load.  */
775               unsigned unrolling_factor
776                 = least_common_multiple
777                     (*max_nunits, group_size) / group_size;
778               /* FORNOW: Check that there is no gap between the loads
779                  and no gap between the groups when we need to load
780                  multiple groups at once.
781                  ???  We should enhance this to only disallow gaps
782                  inside vectors.  */
783               if ((unrolling_factor > 1
784                    && ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
785                         && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
786                        /* If the group is split up then GROUP_GAP
787                           isn't correct here, nor is GROUP_FIRST_ELEMENT.  */
788                        || GROUP_SIZE (vinfo_for_stmt (stmt)) > group_size))
789                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
790                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
791                 {
792                   if (dump_enabled_p ())
793                     {
794                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
795                                        "Build SLP failed: grouped "
796                                        "loads have gaps ");
797                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
798                                         stmt, 0);
799                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
800                     }
801                   /* Fatal mismatch.  */
802                   matches[0] = false;
803                   return false;
804                 }
805
806               /* Check that the size of interleaved loads group is not
807                  greater than the SLP group size.  */
808               unsigned ncopies
809                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
810               if (loop_vinfo
811                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
812                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
813                        - GROUP_GAP (vinfo_for_stmt (stmt)))
814                       > ncopies * group_size))
815                 {
816                   if (dump_enabled_p ())
817                     {
818                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
819                                        "Build SLP failed: the number "
820                                        "of interleaved loads is greater than "
821                                        "the SLP group size ");
822                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
823                                         stmt, 0);
824                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
825                     }
826                   /* Fatal mismatch.  */
827                   matches[0] = false;
828                   return false;
829                 }
830
831               old_first_load = first_load;
832               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
833               if (prev_first_load)
834                 {
835                   /* Check that there are no loads from different interleaving
836                      chains in the same node.  */
837                   if (prev_first_load != first_load)
838                     {
839                       if (dump_enabled_p ())
840                         {
841                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
842                                            vect_location, 
843                                            "Build SLP failed: different "
844                                            "interleaving chains in one node ");
845                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
846                                             stmt, 0);
847                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
848                         }
849                       /* Mismatch.  */
850                       continue;
851                     }
852                 }
853               else
854                 prev_first_load = first_load;
855
856               /* In some cases a group of loads is just the same load
857                  repeated N times.  Only analyze its cost once.  */
858               if (first_load == stmt && old_first_load != first_load)
859                 {
860                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
861                   if (vect_supportable_dr_alignment (first_dr, false)
862                       == dr_unaligned_unsupported)
863                     {
864                       if (dump_enabled_p ())
865                         {
866                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
867                                            vect_location, 
868                                            "Build SLP failed: unsupported "
869                                            "unaligned load ");
870                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
871                                             stmt, 0);
872                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
873                         }
874                       /* Fatal mismatch.  */
875                       matches[0] = false;
876                       return false;
877                     }
878                 }
879            }
880         } /* Grouped access.  */
881       else
882         {
883           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
884             {
885               /* Not grouped load.  */
886               if (dump_enabled_p ())
887                 {
888                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
889                                    "Build SLP failed: not grouped load ");
890                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
891                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
892                 }
893
894               /* FORNOW: Not grouped loads are not supported.  */
895               /* Fatal mismatch.  */
896               matches[0] = false;
897               return false;
898             }
899
900           /* Not memory operation.  */
901           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
902               && TREE_CODE_CLASS (rhs_code) != tcc_unary
903               && TREE_CODE_CLASS (rhs_code) != tcc_expression
904               && rhs_code != CALL_EXPR)
905             {
906               if (dump_enabled_p ())
907                 {
908                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
909                                    "Build SLP failed: operation");
910                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
911                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
912                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
913                 }
914               /* Fatal mismatch.  */
915               matches[0] = false;
916               return false;
917             }
918
919           if (rhs_code == COND_EXPR)
920             {
921               tree cond_expr = gimple_assign_rhs1 (stmt);
922
923               if (i == 0)
924                 first_cond_code = TREE_CODE (cond_expr);
925               else if (first_cond_code != TREE_CODE (cond_expr))
926                 {
927                   if (dump_enabled_p ())
928                     {
929                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
930                                        "Build SLP failed: different"
931                                        " operation");
932                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
933                                         stmt, 0);
934                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
935                     }
936                   /* Mismatch.  */
937                   continue;
938                 }
939             }
940         }
941
942       matches[i] = true;
943     }
944
945   for (i = 0; i < group_size; ++i)
946     if (!matches[i])
947       return false;
948
949   /* If we allowed a two-operation SLP node verify the target can cope
950      with the permute we are going to use.  */
951   if (alt_stmt_code != ERROR_MARK
952       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
953     {
954       unsigned char *sel
955         = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
956       for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
957         {
958           sel[i] = i;
959           if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
960             sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
961         }
962       if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
963         {
964           for (i = 0; i < group_size; ++i)
965             if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
966               {
967                 matches[i] = false;
968                 if (dump_enabled_p ())
969                   {
970                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
971                                      "Build SLP failed: different operation "
972                                      "in stmt ");
973                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
974                                       stmts[i], 0);
975                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
976                                      "original stmt ");
977                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
978                                       first_stmt, 0);
979                   }
980               }
981           return false;
982         }
983       *two_operators = true;
984     }
985
986   return true;
987 }
988
989 /* Recursively build an SLP tree starting from NODE.
990    Fail (and return a value not equal to zero) if def-stmts are not
991    isomorphic, require data permutation or are of unsupported types of
992    operation.  Otherwise, return 0.
993    The value returned is the depth in the SLP tree where a mismatch
994    was found.  */
995
996 static bool
997 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
998                      slp_tree *node, unsigned int group_size,
999                      unsigned int *max_nunits,
1000                      vec<slp_tree> *loads,
1001                      unsigned int vectorization_factor,
1002                      bool *matches, unsigned *npermutes, unsigned *tree_size,
1003                      unsigned max_tree_size)
1004 {
1005   unsigned nops, i, this_tree_size = 0;
1006   gimple stmt;
1007
1008   matches[0] = false;
1009
1010   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
1011   if (is_gimple_call (stmt))
1012     nops = gimple_call_num_args (stmt);
1013   else if (is_gimple_assign (stmt))
1014     {
1015       nops = gimple_num_ops (stmt) - 1;
1016       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1017         nops++;
1018     }
1019   else
1020     return false;
1021
1022   bool two_operators = false;
1023   if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
1024                               SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
1025                               max_nunits, vectorization_factor, matches,
1026                               &two_operators))
1027     return false;
1028   SLP_TREE_TWO_OPERATORS (*node) = two_operators;
1029
1030   /* If the SLP node is a load, terminate the recursion.  */
1031   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1032       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1033     {
1034       loads->safe_push (*node);
1035       return true;
1036     }
1037
1038   /* Get at the operands, verifying they are compatible.  */
1039   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1040   slp_oprnd_info oprnd_info;
1041   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
1042     {
1043       switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
1044                                            stmt, i, &oprnds_info))
1045         {
1046         case 0:
1047           break;
1048         case -1:
1049           matches[0] = false;
1050           vect_free_oprnd_info (oprnds_info);
1051           return false;
1052         case 1:
1053           matches[i] = false;
1054           break;
1055         }
1056     }
1057   for (i = 0; i < group_size; ++i)
1058     if (!matches[i])
1059       {
1060         vect_free_oprnd_info (oprnds_info);
1061         return false;
1062       }
1063
1064   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
1065
1066   /* Create SLP_TREE nodes for the definition node/s.  */
1067   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1068     {
1069       slp_tree child;
1070       unsigned old_nloads = loads->length ();
1071       unsigned old_max_nunits = *max_nunits;
1072
1073       if (oprnd_info->first_dt != vect_internal_def)
1074         continue;
1075
1076       if (++this_tree_size > max_tree_size)
1077         {
1078           vect_free_oprnd_info (oprnds_info);
1079           return false;
1080         }
1081
1082       child = vect_create_new_slp_node (oprnd_info->def_stmts);
1083       if (!child)
1084         {
1085           vect_free_oprnd_info (oprnds_info);
1086           return false;
1087         }
1088
1089       if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1090                                group_size, max_nunits, loads,
1091                                vectorization_factor, matches,
1092                                npermutes, &this_tree_size, max_tree_size))
1093         {
1094           /* If we have all children of child built up from scalars then just
1095              throw that away and build it up this node from scalars.  */
1096           if (!SLP_TREE_CHILDREN (child).is_empty ())
1097             {
1098               unsigned int j;
1099               slp_tree grandchild;
1100
1101               FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1102                 if (grandchild != NULL)
1103                   break;
1104               if (!grandchild)
1105                 {
1106                   /* Roll back.  */
1107                   *max_nunits = old_max_nunits;
1108                   loads->truncate (old_nloads);
1109                   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1110                       vect_free_slp_tree (grandchild);
1111                   SLP_TREE_CHILDREN (child).truncate (0);
1112
1113                   dump_printf_loc (MSG_NOTE, vect_location,
1114                                    "Building parent vector operands from "
1115                                    "scalars instead\n");
1116                   oprnd_info->def_stmts = vNULL;
1117                   vect_free_slp_tree (child);
1118                   SLP_TREE_CHILDREN (*node).quick_push (NULL);
1119                   continue;
1120                 }
1121             }
1122
1123           oprnd_info->def_stmts = vNULL;
1124           SLP_TREE_CHILDREN (*node).quick_push (child);
1125           continue;
1126         }
1127
1128       /* If the SLP build failed fatally and we analyze a basic-block
1129          simply treat nodes we fail to build as externally defined
1130          (and thus build vectors from the scalar defs).
1131          The cost model will reject outright expensive cases.
1132          ???  This doesn't treat cases where permutation ultimatively
1133          fails (or we don't try permutation below).  Ideally we'd
1134          even compute a permutation that will end up with the maximum
1135          SLP tree size...  */
1136       if (bb_vinfo
1137           && !matches[0]
1138           /* ???  Rejecting patterns this way doesn't work.  We'd have to
1139              do extra work to cancel the pattern so the uses see the
1140              scalar version.  */
1141           && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1142         {
1143           unsigned int j;
1144           slp_tree grandchild;
1145
1146           /* Roll back.  */
1147           *max_nunits = old_max_nunits;
1148           loads->truncate (old_nloads);
1149           FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1150             vect_free_slp_tree (grandchild);
1151           SLP_TREE_CHILDREN (child).truncate (0);
1152
1153           dump_printf_loc (MSG_NOTE, vect_location,
1154                            "Building vector operands from scalars\n");
1155           oprnd_info->def_stmts = vNULL;
1156           vect_free_slp_tree (child);
1157           SLP_TREE_CHILDREN (*node).quick_push (NULL);
1158           continue;
1159         }
1160
1161       /* If the SLP build for operand zero failed and operand zero
1162          and one can be commutated try that for the scalar stmts
1163          that failed the match.  */
1164       if (i == 0
1165           /* A first scalar stmt mismatch signals a fatal mismatch.  */
1166           && matches[0]
1167           /* ???  For COND_EXPRs we can swap the comparison operands
1168              as well as the arms under some constraints.  */
1169           && nops == 2
1170           && oprnds_info[1]->first_dt == vect_internal_def
1171           && is_gimple_assign (stmt)
1172           && commutative_tree_code (gimple_assign_rhs_code (stmt))
1173           && !SLP_TREE_TWO_OPERATORS (*node)
1174           /* Do so only if the number of not successful permutes was nor more
1175              than a cut-ff as re-trying the recursive match on
1176              possibly each level of the tree would expose exponential
1177              behavior.  */
1178           && *npermutes < 4)
1179         {
1180           unsigned int j;
1181           slp_tree grandchild;
1182
1183           /* Roll back.  */
1184           *max_nunits = old_max_nunits;
1185           loads->truncate (old_nloads);
1186           FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1187             vect_free_slp_tree (grandchild);
1188           SLP_TREE_CHILDREN (child).truncate (0);
1189
1190           /* Swap mismatched definition stmts.  */
1191           dump_printf_loc (MSG_NOTE, vect_location,
1192                            "Re-trying with swapped operands of stmts ");
1193           for (j = 0; j < group_size; ++j)
1194             if (!matches[j])
1195               {
1196                 gimple tem = oprnds_info[0]->def_stmts[j];
1197                 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1198                 oprnds_info[1]->def_stmts[j] = tem;
1199                 dump_printf (MSG_NOTE, "%d ", j);
1200               }
1201           dump_printf (MSG_NOTE, "\n");
1202           /* And try again with scratch 'matches' ... */
1203           bool *tem = XALLOCAVEC (bool, group_size);
1204           if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1205                                    group_size, max_nunits, loads,
1206                                    vectorization_factor,
1207                                    tem, npermutes, &this_tree_size,
1208                                    max_tree_size))
1209             {
1210               /* ... so if successful we can apply the operand swapping
1211                  to the GIMPLE IL.  This is necessary because for example
1212                  vect_get_slp_defs uses operand indexes and thus expects
1213                  canonical operand order.  */
1214               for (j = 0; j < group_size; ++j)
1215                 if (!matches[j])
1216                   {
1217                     gimple stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
1218                     swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1219                                        gimple_assign_rhs2_ptr (stmt));
1220                   }
1221               oprnd_info->def_stmts = vNULL;
1222               SLP_TREE_CHILDREN (*node).quick_push (child);
1223               continue;
1224             }
1225
1226           ++*npermutes;
1227         }
1228
1229       oprnd_info->def_stmts = vNULL;
1230       vect_free_slp_tree (child);
1231       vect_free_oprnd_info (oprnds_info);
1232       return false;
1233     }
1234
1235   if (tree_size)
1236     *tree_size += this_tree_size;
1237
1238   vect_free_oprnd_info (oprnds_info);
1239   return true;
1240 }
1241
1242 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1243
1244 static void
1245 vect_print_slp_tree (int dump_kind, slp_tree node)
1246 {
1247   int i;
1248   gimple stmt;
1249   slp_tree child;
1250
1251   if (!node)
1252     return;
1253
1254   dump_printf (dump_kind, "node ");
1255   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1256     {
1257       dump_printf (dump_kind, "\n\tstmt %d ", i);
1258       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1259     }
1260   dump_printf (dump_kind, "\n");
1261
1262   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1263     vect_print_slp_tree (dump_kind, child);
1264 }
1265
1266
1267 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1268    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1269    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1270    stmts in NODE are to be marked.  */
1271
1272 static void
1273 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1274 {
1275   int i;
1276   gimple stmt;
1277   slp_tree child;
1278
1279   if (!node)
1280     return;
1281
1282   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1283     if (j < 0 || i == j)
1284       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1285
1286   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1287     vect_mark_slp_stmts (child, mark, j);
1288 }
1289
1290
1291 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1292
1293 static void
1294 vect_mark_slp_stmts_relevant (slp_tree node)
1295 {
1296   int i;
1297   gimple stmt;
1298   stmt_vec_info stmt_info;
1299   slp_tree child;
1300
1301   if (!node)
1302     return;
1303
1304   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1305     {
1306       stmt_info = vinfo_for_stmt (stmt);
1307       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1308                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1309       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1310     }
1311
1312   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1313     vect_mark_slp_stmts_relevant (child);
1314 }
1315
1316
1317 /* Rearrange the statements of NODE according to PERMUTATION.  */
1318
1319 static void
1320 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1321                           vec<unsigned> permutation)
1322 {
1323   gimple stmt;
1324   vec<gimple> tmp_stmts;
1325   unsigned int i;
1326   slp_tree child;
1327
1328   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1329     vect_slp_rearrange_stmts (child, group_size, permutation);
1330
1331   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1332   tmp_stmts.create (group_size);
1333   tmp_stmts.quick_grow_cleared (group_size);
1334
1335   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1336     tmp_stmts[permutation[i]] = stmt;
1337
1338   SLP_TREE_SCALAR_STMTS (node).release ();
1339   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1340 }
1341
1342
1343 /* Check if the required load permutations in the SLP instance
1344    SLP_INSTN are supported.  */
1345
1346 static bool
1347 vect_supported_load_permutation_p (slp_instance slp_instn)
1348 {
1349   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1350   unsigned int i, j, k, next;
1351   sbitmap load_index;
1352   slp_tree node;
1353   gimple stmt, load, next_load, first_load;
1354   struct data_reference *dr;
1355
1356   if (dump_enabled_p ())
1357     {
1358       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1359       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1360         if (node->load_permutation.exists ())
1361           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1362             dump_printf (MSG_NOTE, "%d ", next);
1363         else
1364           for (k = 0; k < group_size; ++k)
1365             dump_printf (MSG_NOTE, "%d ", k);
1366       dump_printf (MSG_NOTE, "\n");
1367     }
1368
1369   /* In case of reduction every load permutation is allowed, since the order
1370      of the reduction statements is not important (as opposed to the case of
1371      grouped stores).  The only condition we need to check is that all the
1372      load nodes are of the same size and have the same permutation (and then
1373      rearrange all the nodes of the SLP instance according to this 
1374      permutation).  */
1375
1376   /* Check that all the load nodes are of the same size.  */
1377   /* ???  Can't we assert this? */
1378   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1379     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1380       return false;
1381
1382   node = SLP_INSTANCE_TREE (slp_instn);
1383   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1384
1385   /* Reduction (there are no data-refs in the root).
1386      In reduction chain the order of the loads is important.  */
1387   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1388       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1389     {
1390       slp_tree load;
1391       unsigned int lidx;
1392
1393       /* Compare all the permutation sequences to the first one.  We know
1394          that at least one load is permuted.  */
1395       node = SLP_INSTANCE_LOADS (slp_instn)[0];
1396       if (!node->load_permutation.exists ())
1397         return false;
1398       for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1399         {
1400           if (!load->load_permutation.exists ())
1401             return false;
1402           FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1403             if (lidx != node->load_permutation[j])
1404               return false;
1405         }
1406
1407       /* Check that the loads in the first sequence are different and there
1408          are no gaps between them.  */
1409       load_index = sbitmap_alloc (group_size);
1410       bitmap_clear (load_index);
1411       FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1412         {
1413           if (bitmap_bit_p (load_index, lidx))
1414             {
1415               sbitmap_free (load_index);
1416               return false;
1417             }
1418           bitmap_set_bit (load_index, lidx);
1419         }
1420       for (i = 0; i < group_size; i++)
1421         if (!bitmap_bit_p (load_index, i))
1422           {
1423             sbitmap_free (load_index);
1424             return false;
1425           }
1426       sbitmap_free (load_index);
1427
1428       /* This permutation is valid for reduction.  Since the order of the
1429          statements in the nodes is not important unless they are memory
1430          accesses, we can rearrange the statements in all the nodes
1431          according to the order of the loads.  */
1432       vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1433                                 node->load_permutation);
1434
1435       /* We are done, no actual permutations need to be generated.  */
1436       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1437         SLP_TREE_LOAD_PERMUTATION (node).release ();
1438       return true;
1439     }
1440
1441   /* In basic block vectorization we allow any subchain of an interleaving
1442      chain.
1443      FORNOW: not supported in loop SLP because of realignment compications.  */
1444   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1445     {
1446       /* Check whether the loads in an instance form a subchain and thus
1447          no permutation is necessary.  */
1448       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1449         {
1450           if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1451             continue;
1452           bool subchain_p = true;
1453           next_load = NULL;
1454           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1455             {
1456               if (j != 0 && next_load != load)
1457                 {
1458                   subchain_p = false;
1459                   break;
1460                 }
1461               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1462             }
1463           if (subchain_p)
1464             SLP_TREE_LOAD_PERMUTATION (node).release ();
1465           else
1466             {
1467               /* Verify the permutation can be generated.  */
1468               vec<tree> tem;
1469               if (!vect_transform_slp_perm_load (node, tem, NULL,
1470                                                  1, slp_instn, true))
1471                 {
1472                   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1473                                    vect_location,
1474                                    "unsupported load permutation\n");
1475                   return false;
1476                 }
1477             }
1478         }
1479
1480       /* Check that the alignment of the first load in every subchain, i.e.,
1481          the first statement in every load node, is supported.
1482          ???  This belongs in alignment checking.  */
1483       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1484         {
1485           first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1486           if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1487             {
1488               dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1489               if (vect_supportable_dr_alignment (dr, false)
1490                   == dr_unaligned_unsupported)
1491                 {
1492                   if (dump_enabled_p ())
1493                     {
1494                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1495                                        vect_location,
1496                                        "unsupported unaligned load ");
1497                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1498                                         first_load, 0);
1499                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1500                     }
1501                   return false;
1502                 }
1503             }
1504         }
1505
1506       return true;
1507     }
1508
1509   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1510      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1511      well (unless it's reduction).  */
1512   if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1513     return false;
1514   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1515     if (!node->load_permutation.exists ())
1516       return false;
1517
1518   load_index = sbitmap_alloc (group_size);
1519   bitmap_clear (load_index);
1520   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1521     {
1522       unsigned int lidx = node->load_permutation[0];
1523       if (bitmap_bit_p (load_index, lidx))
1524         {
1525           sbitmap_free (load_index);
1526           return false;
1527         }
1528       bitmap_set_bit (load_index, lidx);
1529       FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1530         if (k != lidx)
1531           {
1532             sbitmap_free (load_index);
1533             return false;
1534           }
1535     }
1536   for (i = 0; i < group_size; i++)
1537     if (!bitmap_bit_p (load_index, i))
1538       {
1539         sbitmap_free (load_index);
1540         return false;
1541       }
1542   sbitmap_free (load_index);
1543
1544   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1545     if (node->load_permutation.exists ()
1546         && !vect_transform_slp_perm_load
1547               (node, vNULL, NULL,
1548                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1549       return false;
1550   return true;
1551 }
1552
1553
1554 /* Find the last store in SLP INSTANCE.  */
1555
1556 static gimple
1557 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1558 {
1559   gimple last = NULL, stmt;
1560
1561   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1562     {
1563       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1564       if (is_pattern_stmt_p (stmt_vinfo))
1565         last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1566       else
1567         last = get_later_stmt (stmt, last);
1568     }
1569
1570   return last;
1571 }
1572
1573 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1574
1575 static void
1576 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1577                          stmt_vector_for_cost *prologue_cost_vec,
1578                          stmt_vector_for_cost *body_cost_vec,
1579                          unsigned ncopies_for_cost)
1580 {
1581   unsigned i;
1582   slp_tree child;
1583   gimple stmt, s;
1584   stmt_vec_info stmt_info;
1585   tree lhs;
1586   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1587
1588   /* Recurse down the SLP tree.  */
1589   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1590     if (child)
1591       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1592                                body_cost_vec, ncopies_for_cost);
1593
1594   /* Look at the first scalar stmt to determine the cost.  */
1595   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1596   stmt_info = vinfo_for_stmt (stmt);
1597   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1598     {
1599       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1600         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1601                                vect_uninitialized_def,
1602                                node, prologue_cost_vec, body_cost_vec);
1603       else
1604         {
1605           int i;
1606           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1607           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1608                                 node, prologue_cost_vec, body_cost_vec);
1609           /* If the load is permuted record the cost for the permutation.
1610              ???  Loads from multiple chains are let through here only
1611              for a single special case involving complex numbers where
1612              in the end no permutation is necessary.  */
1613           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1614             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1615                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1616                 && vect_get_place_in_interleaving_chain
1617                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1618               {
1619                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1620                                   stmt_info, 0, vect_body);
1621                 break;
1622               }
1623         }
1624     }
1625   else
1626     {
1627       record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1628                         stmt_info, 0, vect_body);
1629       if (SLP_TREE_TWO_OPERATORS (node))
1630         {
1631           record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1632                             stmt_info, 0, vect_body);
1633           record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1634                             stmt_info, 0, vect_body);
1635         }
1636     }
1637
1638   /* Scan operands and account for prologue cost of constants/externals.
1639      ???  This over-estimates cost for multiple uses and should be
1640      re-engineered.  */
1641   lhs = gimple_get_lhs (stmt);
1642   for (i = 0; i < gimple_num_ops (stmt); ++i)
1643     {
1644       tree def, op = gimple_op (stmt, i);
1645       gimple def_stmt;
1646       enum vect_def_type dt;
1647       if (!op || op == lhs)
1648         continue;
1649       if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
1650                               STMT_VINFO_BB_VINFO (stmt_info),
1651                               &def_stmt, &def, &dt))
1652         {
1653           /* Without looking at the actual initializer a vector of
1654              constants can be implemented as load from the constant pool.
1655              ???  We need to pass down stmt_info for a vector type
1656              even if it points to the wrong stmt.  */
1657           if (dt == vect_constant_def)
1658             record_stmt_cost (prologue_cost_vec, 1, vector_load,
1659                               stmt_info, 0, vect_prologue);
1660           else if (dt == vect_external_def)
1661             record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1662                               stmt_info, 0, vect_prologue);
1663         }
1664     }
1665 }
1666
1667 /* Compute the cost for the SLP instance INSTANCE.  */
1668
1669 static void
1670 vect_analyze_slp_cost (slp_instance instance, void *data)
1671 {
1672   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1673   unsigned ncopies_for_cost;
1674   stmt_info_for_cost *si;
1675   unsigned i;
1676
1677   /* Calculate the number of vector stmts to create based on the unrolling
1678      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1679      GROUP_SIZE / NUNITS otherwise.  */
1680   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1681   slp_tree node = SLP_INSTANCE_TREE (instance);
1682   stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1683   /* Adjust the group_size by the vectorization factor which is always one
1684      for basic-block vectorization.  */
1685   if (STMT_VINFO_LOOP_VINFO (stmt_info))
1686     group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1687   unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1688   /* For reductions look at a reduction operand in case the reduction
1689      operation is widening like DOT_PROD or SAD.  */
1690   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1691     {
1692       gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1693       switch (gimple_assign_rhs_code (stmt))
1694         {
1695         case DOT_PROD_EXPR:
1696         case SAD_EXPR:
1697           nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1698                                 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1699           break;
1700         default:;
1701         }
1702     }
1703   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1704
1705   prologue_cost_vec.create (10);
1706   body_cost_vec.create (10);
1707   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1708                            &prologue_cost_vec, &body_cost_vec,
1709                            ncopies_for_cost);
1710
1711   /* Record the prologue costs, which were delayed until we were
1712      sure that SLP was successful.  */
1713   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1714     {
1715       struct _stmt_vec_info *stmt_info
1716         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1717       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1718                             si->misalign, vect_prologue);
1719     }
1720
1721   /* Record the instance's instructions in the target cost model.  */
1722   FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1723     {
1724       struct _stmt_vec_info *stmt_info
1725         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1726       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1727                             si->misalign, vect_body);
1728     }
1729
1730   prologue_cost_vec.release ();
1731   body_cost_vec.release ();
1732 }
1733
1734 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1735    vect_build_slp_tree to build a tree of packed stmts if possible.
1736    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1737
1738 static bool
1739 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1740                            gimple stmt, unsigned max_tree_size)
1741 {
1742   slp_instance new_instance;
1743   slp_tree node;
1744   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1745   unsigned int unrolling_factor = 1, nunits;
1746   tree vectype, scalar_type = NULL_TREE;
1747   gimple next;
1748   unsigned int vectorization_factor = 0;
1749   int i;
1750   unsigned int max_nunits = 0;
1751   vec<slp_tree> loads;
1752   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1753   vec<gimple> scalar_stmts;
1754
1755   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1756     {
1757       if (dr)
1758         {
1759           scalar_type = TREE_TYPE (DR_REF (dr));
1760           vectype = get_vectype_for_scalar_type (scalar_type);
1761         }
1762       else
1763         {
1764           gcc_assert (loop_vinfo);
1765           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1766         }
1767
1768       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1769     }
1770   else
1771     {
1772       gcc_assert (loop_vinfo);
1773       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1774       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1775     }
1776
1777   if (!vectype)
1778     {
1779       if (dump_enabled_p ())
1780         {
1781           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1782                            "Build SLP failed: unsupported data-type ");
1783           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1784           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1785         }
1786
1787       return false;
1788     }
1789
1790   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1791   if (loop_vinfo)
1792     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1793   else
1794     vectorization_factor = nunits;
1795
1796   /* Calculate the unrolling factor.  */
1797   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1798   if (unrolling_factor != 1 && !loop_vinfo)
1799     {
1800       if (dump_enabled_p ())
1801         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1802                          "Build SLP failed: unrolling required in basic"
1803                          " block SLP\n");
1804
1805       return false;
1806     }
1807
1808   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1809   scalar_stmts.create (group_size);
1810   next = stmt;
1811   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1812     {
1813       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1814       while (next)
1815         {
1816           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1817               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1818             scalar_stmts.safe_push (
1819                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1820           else
1821             scalar_stmts.safe_push (next);
1822           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1823         }
1824       /* Mark the first element of the reduction chain as reduction to properly
1825          transform the node.  In the reduction analysis phase only the last
1826          element of the chain is marked as reduction.  */
1827       if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1828         STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1829     }
1830   else
1831     {
1832       /* Collect reduction statements.  */
1833       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1834       for (i = 0; reductions.iterate (i, &next); i++)
1835         scalar_stmts.safe_push (next);
1836     }
1837
1838   node = vect_create_new_slp_node (scalar_stmts);
1839
1840   loads.create (group_size);
1841
1842   /* Build the tree for the SLP instance.  */
1843   bool *matches = XALLOCAVEC (bool, group_size);
1844   unsigned npermutes = 0;
1845   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1846                            &max_nunits, &loads,
1847                            vectorization_factor, matches, &npermutes, NULL,
1848                            max_tree_size))
1849     {
1850       /* Calculate the unrolling factor based on the smallest type.  */
1851       if (max_nunits > nunits)
1852         unrolling_factor = least_common_multiple (max_nunits, group_size)
1853                            / group_size;
1854
1855       if (unrolling_factor != 1 && !loop_vinfo)
1856         {
1857           if (dump_enabled_p ())
1858             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1859                              "Build SLP failed: unrolling required in basic"
1860                              " block SLP\n");
1861           vect_free_slp_tree (node);
1862           loads.release ();
1863           return false;
1864         }
1865
1866       /* Create a new SLP instance.  */
1867       new_instance = XNEW (struct _slp_instance);
1868       SLP_INSTANCE_TREE (new_instance) = node;
1869       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1870       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1871       SLP_INSTANCE_LOADS (new_instance) = loads;
1872
1873       /* Compute the load permutation.  */
1874       slp_tree load_node;
1875       bool loads_permuted = false;
1876       FOR_EACH_VEC_ELT (loads, i, load_node)
1877         {
1878           vec<unsigned> load_permutation;
1879           int j;
1880           gimple load, first_stmt;
1881           bool this_load_permuted = false;
1882           load_permutation.create (group_size);
1883           first_stmt = GROUP_FIRST_ELEMENT
1884               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1885           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1886             {
1887               int load_place
1888                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1889               gcc_assert (load_place != -1);
1890               if (load_place != j)
1891                 this_load_permuted = true;
1892               load_permutation.safe_push (load_place);
1893             }
1894           if (!this_load_permuted)
1895             {
1896               load_permutation.release ();
1897               continue;
1898             }
1899           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1900           loads_permuted = true;
1901         }
1902
1903       if (loads_permuted)
1904         {
1905           if (!vect_supported_load_permutation_p (new_instance))
1906             {
1907               if (dump_enabled_p ())
1908                 {
1909                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1910                                    "Build SLP failed: unsupported load "
1911                                    "permutation ");
1912                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1913                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1914                 }
1915               vect_free_slp_instance (new_instance);
1916               return false;
1917             }
1918         }
1919
1920
1921       if (loop_vinfo)
1922         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1923       else
1924         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1925
1926       if (dump_enabled_p ())
1927         vect_print_slp_tree (MSG_NOTE, node);
1928
1929       return true;
1930     }
1931
1932   /* Failed to SLP.  */
1933   /* Free the allocated memory.  */
1934   vect_free_slp_tree (node);
1935   loads.release ();
1936
1937   return false;
1938 }
1939
1940
1941 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1942    trees of packed scalar stmts if SLP is possible.  */
1943
1944 bool
1945 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1946                   unsigned max_tree_size)
1947 {
1948   unsigned int i;
1949   vec<gimple> grouped_stores;
1950   vec<gimple> reductions = vNULL;
1951   vec<gimple> reduc_chains = vNULL;
1952   gimple first_element;
1953   bool ok = false;
1954
1955   if (dump_enabled_p ())
1956     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1957
1958   if (loop_vinfo)
1959     {
1960       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1961       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1962       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1963     }
1964   else
1965     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1966
1967   /* Find SLP sequences starting from groups of grouped stores.  */
1968   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1969     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1970                                    max_tree_size))
1971       ok = true;
1972
1973   if (reduc_chains.length () > 0)
1974     {
1975       /* Find SLP sequences starting from reduction chains.  */
1976       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1977         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1978                                        max_tree_size))
1979           ok = true;
1980         else
1981           return false;
1982
1983       /* Don't try to vectorize SLP reductions if reduction chain was
1984          detected.  */
1985       return ok;
1986     }
1987
1988   /* Find SLP sequences starting from groups of reductions.  */
1989   if (reductions.length () > 1
1990       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1991                                     max_tree_size))
1992     ok = true;
1993
1994   return true;
1995 }
1996
1997
1998 /* For each possible SLP instance decide whether to SLP it and calculate overall
1999    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
2000    least one instance.  */
2001
2002 bool
2003 vect_make_slp_decision (loop_vec_info loop_vinfo)
2004 {
2005   unsigned int i, unrolling_factor = 1;
2006   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2007   slp_instance instance;
2008   int decided_to_slp = 0;
2009
2010   if (dump_enabled_p ())
2011     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2012                      "\n");
2013
2014   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2015     {
2016       /* FORNOW: SLP if you can.  */
2017       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2018         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2019
2020       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
2021          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2022          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
2023       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2024       decided_to_slp++;
2025     }
2026
2027   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2028
2029   if (decided_to_slp && dump_enabled_p ())
2030     dump_printf_loc (MSG_NOTE, vect_location,
2031                      "Decided to SLP %d instances. Unrolling factor %d\n",
2032                      decided_to_slp, unrolling_factor);
2033
2034   return (decided_to_slp > 0);
2035 }
2036
2037
2038 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2039    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
2040
2041 static void
2042 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2043 {
2044   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2045   imm_use_iterator imm_iter;
2046   gimple use_stmt;
2047   stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2048   slp_tree child;
2049   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2050   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2051   int j;
2052
2053   /* Propagate hybrid down the SLP tree.  */
2054   if (stype == hybrid)
2055     ;
2056   else if (HYBRID_SLP_STMT (stmt_vinfo))
2057     stype = hybrid;
2058   else
2059     {
2060       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
2061       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2062       /* We always get the pattern stmt here, but for immediate
2063          uses we have to use the LHS of the original stmt.  */
2064       gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2065       if (STMT_VINFO_RELATED_STMT (stmt_vinfo))
2066         stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2067       if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2068         FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
2069           {
2070             if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2071               continue;
2072             use_vinfo = vinfo_for_stmt (use_stmt);
2073             if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2074                 && STMT_VINFO_RELATED_STMT (use_vinfo))
2075               use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2076             if (!STMT_SLP_TYPE (use_vinfo)
2077                 && (STMT_VINFO_RELEVANT (use_vinfo)
2078                     || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2079                 && !(gimple_code (use_stmt) == GIMPLE_PHI
2080                      && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2081               stype = hybrid;
2082           }
2083     }
2084
2085   if (stype == hybrid)
2086     {
2087       if (dump_enabled_p ())
2088         {
2089           dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2090           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2091         }
2092       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2093     }
2094
2095   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2096     if (child)
2097       vect_detect_hybrid_slp_stmts (child, i, stype);
2098 }
2099
2100 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
2101
2102 static tree
2103 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2104 {
2105   walk_stmt_info *wi = (walk_stmt_info *)data;
2106   struct loop *loopp = (struct loop *)wi->info;
2107
2108   if (wi->is_lhs)
2109     return NULL_TREE;
2110
2111   if (TREE_CODE (*tp) == SSA_NAME
2112       && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2113     {
2114       gimple def_stmt = SSA_NAME_DEF_STMT (*tp);
2115       if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2116           && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2117         {
2118           if (dump_enabled_p ())
2119             {
2120               dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2121               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2122             }
2123           STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2124         }
2125     }
2126
2127   return NULL_TREE;
2128 }
2129
2130 static tree
2131 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2132                           walk_stmt_info *)
2133 {
2134   /* If the stmt is in a SLP instance then this isn't a reason
2135      to mark use definitions in other SLP instances as hybrid.  */
2136   if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2137     *handled = true;
2138   return NULL_TREE;
2139 }
2140
2141 /* Find stmts that must be both vectorized and SLPed.  */
2142
2143 void
2144 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2145 {
2146   unsigned int i;
2147   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2148   slp_instance instance;
2149
2150   if (dump_enabled_p ())
2151     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2152                      "\n");
2153
2154   /* First walk all pattern stmt in the loop and mark defs of uses as
2155      hybrid because immediate uses in them are not recorded.  */
2156   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2157     {
2158       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2159       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2160            gsi_next (&gsi))
2161         {
2162           gimple stmt = gsi_stmt (gsi);
2163           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2164           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2165             {
2166               walk_stmt_info wi;
2167               memset (&wi, 0, sizeof (wi));
2168               wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2169               gimple_stmt_iterator gsi2
2170                 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2171               walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2172                                 vect_detect_hybrid_slp_1, &wi);
2173               walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2174                                vect_detect_hybrid_slp_2,
2175                                vect_detect_hybrid_slp_1, &wi);
2176             }
2177         }
2178     }
2179
2180   /* Then walk the SLP instance trees marking stmts with uses in
2181      non-SLP stmts as hybrid, also propagating hybrid down the
2182      SLP tree, collecting the above info on-the-fly.  */
2183   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2184     {
2185       for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2186         vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2187                                       i, pure_slp);
2188     }
2189 }
2190
2191
2192 /* Create and initialize a new bb_vec_info struct for BB, as well as
2193    stmt_vec_info structs for all the stmts in it.  */
2194
2195 static bb_vec_info
2196 new_bb_vec_info (basic_block bb)
2197 {
2198   bb_vec_info res = NULL;
2199   gimple_stmt_iterator gsi;
2200
2201   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2202   BB_VINFO_BB (res) = bb;
2203
2204   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2205     {
2206       gimple stmt = gsi_stmt (gsi);
2207       gimple_set_uid (stmt, 0);
2208       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
2209     }
2210
2211   BB_VINFO_GROUPED_STORES (res).create (10);
2212   BB_VINFO_SLP_INSTANCES (res).create (2);
2213   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2214
2215   bb->aux = res;
2216   return res;
2217 }
2218
2219
2220 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2221    stmts in the basic block.  */
2222
2223 static void
2224 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2225 {
2226   vec<slp_instance> slp_instances;
2227   slp_instance instance;
2228   basic_block bb;
2229   gimple_stmt_iterator si;
2230   unsigned i;
2231
2232   if (!bb_vinfo)
2233     return;
2234
2235   bb = BB_VINFO_BB (bb_vinfo);
2236
2237   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2238     {
2239       gimple stmt = gsi_stmt (si);
2240       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2241
2242       if (stmt_info)
2243         /* Free stmt_vec_info.  */
2244         free_stmt_vec_info (stmt);
2245     }
2246
2247   vect_destroy_datarefs (NULL, bb_vinfo);
2248   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2249   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2250   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2251   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2252     vect_free_slp_instance (instance);
2253   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2254   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2255   free (bb_vinfo);
2256   bb->aux = NULL;
2257 }
2258
2259
2260 /* Analyze statements contained in SLP tree node after recursively analyzing
2261    the subtree. Return TRUE if the operations are supported.  */
2262
2263 static bool
2264 vect_slp_analyze_node_operations (slp_tree node)
2265 {
2266   bool dummy;
2267   int i;
2268   gimple stmt;
2269   slp_tree child;
2270
2271   if (!node)
2272     return true;
2273
2274   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2275     if (!vect_slp_analyze_node_operations (child))
2276       return false;
2277
2278   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2279     {
2280       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2281       gcc_assert (stmt_info);
2282       gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2283
2284       if (!vect_analyze_stmt (stmt, &dummy, node))
2285         return false;
2286     }
2287
2288   return true;
2289 }
2290
2291
2292 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
2293    operations are supported. */
2294
2295 bool
2296 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2297 {
2298   slp_instance instance;
2299   int i;
2300
2301   if (dump_enabled_p ())
2302     dump_printf_loc (MSG_NOTE, vect_location,
2303                      "=== vect_slp_analyze_operations ===\n");
2304
2305   for (i = 0; slp_instances.iterate (i, &instance); )
2306     {
2307       if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2308         {
2309           dump_printf_loc (MSG_NOTE, vect_location,
2310                            "removing SLP instance operations starting from: ");
2311           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2312                             SLP_TREE_SCALAR_STMTS
2313                               (SLP_INSTANCE_TREE (instance))[0], 0);
2314           vect_free_slp_instance (instance);
2315           slp_instances.ordered_remove (i);
2316         }
2317       else
2318         {
2319           /* Compute the costs of the SLP instance.  */
2320           vect_analyze_slp_cost (instance, data);
2321           i++;
2322         }
2323     }
2324
2325   if (!slp_instances.length ())
2326     return false;
2327
2328   return true;
2329 }
2330
2331
2332 /* Compute the scalar cost of the SLP node NODE and its children
2333    and return it.  Do not account defs that are marked in LIFE and
2334    update LIFE according to uses of NODE.  */
2335
2336 static unsigned
2337 vect_bb_slp_scalar_cost (basic_block bb,
2338                          slp_tree node, vec<bool, va_heap> *life)
2339 {
2340   unsigned scalar_cost = 0;
2341   unsigned i;
2342   gimple stmt;
2343   slp_tree child;
2344
2345   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2346     {
2347       unsigned stmt_cost;
2348       ssa_op_iter op_iter;
2349       def_operand_p def_p;
2350       stmt_vec_info stmt_info;
2351
2352       if ((*life)[i])
2353         continue;
2354
2355       /* If there is a non-vectorized use of the defs then the scalar
2356          stmt is kept live in which case we do not account it or any
2357          required defs in the SLP children in the scalar cost.  This
2358          way we make the vectorization more costly when compared to
2359          the scalar cost.  */
2360       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2361         {
2362           imm_use_iterator use_iter;
2363           gimple use_stmt;
2364           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2365             if (!is_gimple_debug (use_stmt)
2366                 && (gimple_code (use_stmt) == GIMPLE_PHI
2367                     || gimple_bb (use_stmt) != bb
2368                     || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2369               {
2370                 (*life)[i] = true;
2371                 BREAK_FROM_IMM_USE_STMT (use_iter);
2372               }
2373         }
2374       if ((*life)[i])
2375         continue;
2376
2377       stmt_info = vinfo_for_stmt (stmt);
2378       if (STMT_VINFO_DATA_REF (stmt_info))
2379         {
2380           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2381             stmt_cost = vect_get_stmt_cost (scalar_load);
2382           else
2383             stmt_cost = vect_get_stmt_cost (scalar_store);
2384         }
2385       else
2386         stmt_cost = vect_get_stmt_cost (scalar_stmt);
2387
2388       scalar_cost += stmt_cost;
2389     }
2390
2391   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2392     if (child)
2393       scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2394
2395   return scalar_cost;
2396 }
2397
2398 /* Check if vectorization of the basic block is profitable.  */
2399
2400 static bool
2401 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2402 {
2403   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2404   slp_instance instance;
2405   int i;
2406   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2407   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2408
2409   /* Calculate scalar cost.  */
2410   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2411     {
2412       auto_vec<bool, 20> life;
2413       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2414       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2415                                               SLP_INSTANCE_TREE (instance),
2416                                               &life);
2417     }
2418
2419   /* Complete the target-specific cost calculation.  */
2420   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2421                &vec_inside_cost, &vec_epilogue_cost);
2422
2423   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2424
2425   if (dump_enabled_p ())
2426     {
2427       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2428       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2429                    vec_inside_cost);
2430       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2431       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2432       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
2433     }
2434
2435   /* Vectorization is profitable if its cost is less than the cost of scalar
2436      version.  */
2437   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2438     return false;
2439
2440   return true;
2441 }
2442
2443 /* Check if the basic block can be vectorized.  */
2444
2445 static bb_vec_info
2446 vect_slp_analyze_bb_1 (basic_block bb)
2447 {
2448   bb_vec_info bb_vinfo;
2449   vec<slp_instance> slp_instances;
2450   slp_instance instance;
2451   int i;
2452   int min_vf = 2;
2453   unsigned n_stmts = 0;
2454
2455   bb_vinfo = new_bb_vec_info (bb);
2456   if (!bb_vinfo)
2457     return NULL;
2458
2459   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
2460     {
2461       if (dump_enabled_p ())
2462         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2463                          "not vectorized: unhandled data-ref in basic "
2464                          "block.\n");
2465
2466       destroy_bb_vec_info (bb_vinfo);
2467       return NULL;
2468     }
2469
2470   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2471     {
2472       if (dump_enabled_p ())
2473         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2474                          "not vectorized: not enough data-refs in "
2475                          "basic block.\n");
2476
2477       destroy_bb_vec_info (bb_vinfo);
2478       return NULL;
2479     }
2480
2481   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2482     {
2483      if (dump_enabled_p ())
2484        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2485                         "not vectorized: unhandled data access in "
2486                         "basic block.\n");
2487
2488       destroy_bb_vec_info (bb_vinfo);
2489       return NULL;
2490     }
2491
2492   vect_pattern_recog (NULL, bb_vinfo);
2493
2494   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2495     {
2496       if (dump_enabled_p ())
2497         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2498                          "not vectorized: bad data alignment in basic "
2499                          "block.\n");
2500
2501       destroy_bb_vec_info (bb_vinfo);
2502       return NULL;
2503     }
2504
2505   /* Check the SLP opportunities in the basic block, analyze and build SLP
2506      trees.  */
2507   if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
2508     {
2509       if (dump_enabled_p ())
2510         {
2511           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2512                            "Failed to SLP the basic block.\n");
2513           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2514                            "not vectorized: failed to find SLP opportunities "
2515                            "in basic block.\n");
2516         }
2517
2518       destroy_bb_vec_info (bb_vinfo);
2519       return NULL;
2520     }
2521
2522   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2523
2524   /* Mark all the statements that we want to vectorize as pure SLP and
2525      relevant.  */
2526   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2527     {
2528       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2529       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2530     }
2531
2532   /* Mark all the statements that we do not want to vectorize.  */
2533   for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2534        !gsi_end_p (gsi); gsi_next (&gsi))
2535     {
2536       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2537       if (STMT_SLP_TYPE (vinfo) != pure_slp)
2538         STMT_VINFO_VECTORIZABLE (vinfo) = false;
2539     }
2540
2541   /* Analyze dependences.  At this point all stmts not participating in
2542      vectorization have to be marked.  Dependence analysis assumes
2543      that we either vectorize all SLP instances or none at all.  */
2544   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2545      {
2546        if (dump_enabled_p ())
2547          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2548                           "not vectorized: unhandled data dependence "
2549                           "in basic block.\n");
2550
2551        destroy_bb_vec_info (bb_vinfo);
2552        return NULL;
2553      }
2554
2555   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2556     {
2557       if (dump_enabled_p ())
2558         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2559                          "not vectorized: unsupported alignment in basic "
2560                          "block.\n");
2561       destroy_bb_vec_info (bb_vinfo);
2562       return NULL;
2563     }
2564
2565   if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2566                                     BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2567     {
2568       if (dump_enabled_p ())
2569         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2570                          "not vectorized: bad operation in basic block.\n");
2571
2572       destroy_bb_vec_info (bb_vinfo);
2573       return NULL;
2574     }
2575
2576   /* Cost model: check if the vectorization is worthwhile.  */
2577   if (!unlimited_cost_model (NULL)
2578       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2579     {
2580       if (dump_enabled_p ())
2581         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2582                          "not vectorized: vectorization is not "
2583                          "profitable.\n");
2584
2585       destroy_bb_vec_info (bb_vinfo);
2586       return NULL;
2587     }
2588
2589   if (dump_enabled_p ())
2590     dump_printf_loc (MSG_NOTE, vect_location,
2591                      "Basic block will be vectorized using SLP\n");
2592
2593   return bb_vinfo;
2594 }
2595
2596
2597 bb_vec_info
2598 vect_slp_analyze_bb (basic_block bb)
2599 {
2600   bb_vec_info bb_vinfo;
2601   int insns = 0;
2602   gimple_stmt_iterator gsi;
2603   unsigned int vector_sizes;
2604
2605   if (dump_enabled_p ())
2606     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2607
2608   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2609     {
2610       gimple stmt = gsi_stmt (gsi);
2611       if (!is_gimple_debug (stmt)
2612           && !gimple_nop_p (stmt)
2613           && gimple_code (stmt) != GIMPLE_LABEL)
2614         insns++;
2615     }
2616
2617   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2618     {
2619       if (dump_enabled_p ())
2620         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2621                          "not vectorized: too many instructions in "
2622                          "basic block.\n");
2623
2624       return NULL;
2625     }
2626
2627   /* Autodetect first vector size we try.  */
2628   current_vector_size = 0;
2629   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2630
2631   while (1)
2632     {
2633       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2634       if (bb_vinfo)
2635         return bb_vinfo;
2636
2637       destroy_bb_vec_info (bb_vinfo);
2638
2639       vector_sizes &= ~current_vector_size;
2640       if (vector_sizes == 0
2641           || current_vector_size == 0)
2642         return NULL;
2643
2644       /* Try the next biggest vector size.  */
2645       current_vector_size = 1 << floor_log2 (vector_sizes);
2646       if (dump_enabled_p ())
2647         dump_printf_loc (MSG_NOTE, vect_location,
2648                          "***** Re-trying analysis with "
2649                          "vector size %d\n", current_vector_size);
2650     }
2651 }
2652
2653
2654 /* For constant and loop invariant defs of SLP_NODE this function returns
2655    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2656    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2657    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2658    REDUC_INDEX is the index of the reduction operand in the statements, unless
2659    it is -1.  */
2660
2661 static void
2662 vect_get_constant_vectors (tree op, slp_tree slp_node,
2663                            vec<tree> *vec_oprnds,
2664                            unsigned int op_num, unsigned int number_of_vectors,
2665                            int reduc_index)
2666 {
2667   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2668   gimple stmt = stmts[0];
2669   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2670   unsigned nunits;
2671   tree vec_cst;
2672   tree *elts;
2673   unsigned j, number_of_places_left_in_vector;
2674   tree vector_type;
2675   tree vop;
2676   int group_size = stmts.length ();
2677   unsigned int vec_num, i;
2678   unsigned number_of_copies = 1;
2679   vec<tree> voprnds;
2680   voprnds.create (number_of_vectors);
2681   bool constant_p, is_store;
2682   tree neutral_op = NULL;
2683   enum tree_code code = gimple_expr_code (stmt);
2684   gimple def_stmt;
2685   struct loop *loop;
2686   gimple_seq ctor_seq = NULL;
2687
2688   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2689   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2690
2691   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2692       && reduc_index != -1)
2693     {
2694       op_num = reduc_index;
2695       op = gimple_op (stmt, op_num + 1);
2696       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2697          we need either neutral operands or the original operands.  See
2698          get_initial_def_for_reduction() for details.  */
2699       switch (code)
2700         {
2701           case WIDEN_SUM_EXPR:
2702           case DOT_PROD_EXPR:
2703           case SAD_EXPR:
2704           case PLUS_EXPR:
2705           case MINUS_EXPR:
2706           case BIT_IOR_EXPR:
2707           case BIT_XOR_EXPR:
2708              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2709                neutral_op = build_real (TREE_TYPE (op), dconst0);
2710              else
2711                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2712
2713              break;
2714
2715           case MULT_EXPR:
2716              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2717                neutral_op = build_real (TREE_TYPE (op), dconst1);
2718              else
2719                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2720
2721              break;
2722
2723           case BIT_AND_EXPR:
2724             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2725             break;
2726
2727           /* For MIN/MAX we don't have an easy neutral operand but
2728              the initial values can be used fine here.  Only for
2729              a reduction chain we have to force a neutral element.  */
2730           case MAX_EXPR:
2731           case MIN_EXPR:
2732             if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2733               neutral_op = NULL;
2734             else
2735               {
2736                 def_stmt = SSA_NAME_DEF_STMT (op);
2737                 loop = (gimple_bb (stmt))->loop_father;
2738                 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2739                                                     loop_preheader_edge (loop));
2740               }
2741             break;
2742
2743           default:
2744             gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
2745             neutral_op = NULL;
2746         }
2747     }
2748
2749   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2750     {
2751       is_store = true;
2752       op = gimple_assign_rhs1 (stmt);
2753     }
2754   else
2755     is_store = false;
2756
2757   gcc_assert (op);
2758
2759   if (CONSTANT_CLASS_P (op))
2760     constant_p = true;
2761   else
2762     constant_p = false;
2763
2764   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2765      created vectors. It is greater than 1 if unrolling is performed.
2766
2767      For example, we have two scalar operands, s1 and s2 (e.g., group of
2768      strided accesses of size two), while NUNITS is four (i.e., four scalars
2769      of this type can be packed in a vector).  The output vector will contain
2770      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2771      will be 2).
2772
2773      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2774      containing the operands.
2775
2776      For example, NUNITS is four as before, and the group size is 8
2777      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2778      {s5, s6, s7, s8}.  */
2779
2780   number_of_copies = nunits * number_of_vectors / group_size;
2781
2782   number_of_places_left_in_vector = nunits;
2783   elts = XALLOCAVEC (tree, nunits);
2784   bool place_after_defs = false;
2785   for (j = 0; j < number_of_copies; j++)
2786     {
2787       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2788         {
2789           if (is_store)
2790             op = gimple_assign_rhs1 (stmt);
2791           else
2792             {
2793               switch (code)
2794                 {
2795                   case COND_EXPR:
2796                     if (op_num == 0 || op_num == 1)
2797                       {
2798                         tree cond = gimple_assign_rhs1 (stmt);
2799                         op = TREE_OPERAND (cond, op_num);
2800                       }
2801                     else
2802                       {
2803                         if (op_num == 2)
2804                           op = gimple_assign_rhs2 (stmt);
2805                         else
2806                           op = gimple_assign_rhs3 (stmt);
2807                       }
2808                     break;
2809
2810                   case CALL_EXPR:
2811                     op = gimple_call_arg (stmt, op_num);
2812                     break;
2813
2814                   case LSHIFT_EXPR:
2815                   case RSHIFT_EXPR:
2816                   case LROTATE_EXPR:
2817                   case RROTATE_EXPR:
2818                     op = gimple_op (stmt, op_num + 1);
2819                     /* Unlike the other binary operators, shifts/rotates have
2820                        the shift count being int, instead of the same type as
2821                        the lhs, so make sure the scalar is the right type if
2822                        we are dealing with vectors of
2823                        long long/long/short/char.  */
2824                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2825                       op = fold_convert (TREE_TYPE (vector_type), op);
2826                     break;
2827
2828                   default:
2829                     op = gimple_op (stmt, op_num + 1);
2830                     break;
2831                 }
2832             }
2833
2834           if (reduc_index != -1)
2835             {
2836               loop = (gimple_bb (stmt))->loop_father;
2837               def_stmt = SSA_NAME_DEF_STMT (op);
2838
2839               gcc_assert (loop);
2840
2841               /* Get the def before the loop.  In reduction chain we have only
2842                  one initial value.  */
2843               if ((j != (number_of_copies - 1)
2844                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2845                        && i != 0))
2846                   && neutral_op)
2847                 op = neutral_op;
2848               else
2849                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2850                                             loop_preheader_edge (loop));
2851             }
2852
2853           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2854           number_of_places_left_in_vector--;
2855           tree orig_op = op;
2856           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2857             {
2858               if (CONSTANT_CLASS_P (op))
2859                 {
2860                   op = fold_unary (VIEW_CONVERT_EXPR,
2861                                    TREE_TYPE (vector_type), op);
2862                   gcc_assert (op && CONSTANT_CLASS_P (op));
2863                 }
2864               else
2865                 {
2866                   tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
2867                   gimple init_stmt;
2868                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
2869                   init_stmt
2870                     = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
2871                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2872                   op = new_temp;
2873                 }
2874             }
2875           elts[number_of_places_left_in_vector] = op;
2876           if (!CONSTANT_CLASS_P (op))
2877             constant_p = false;
2878           if (TREE_CODE (orig_op) == SSA_NAME
2879               && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2880               && STMT_VINFO_BB_VINFO (stmt_vinfo)
2881               && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2882                   == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2883             place_after_defs = true;
2884
2885           if (number_of_places_left_in_vector == 0)
2886             {
2887               number_of_places_left_in_vector = nunits;
2888
2889               if (constant_p)
2890                 vec_cst = build_vector (vector_type, elts);
2891               else
2892                 {
2893                   vec<constructor_elt, va_gc> *v;
2894                   unsigned k;
2895                   vec_alloc (v, nunits);
2896                   for (k = 0; k < nunits; ++k)
2897                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2898                   vec_cst = build_constructor (vector_type, v);
2899                 }
2900               tree init;
2901               gimple_stmt_iterator gsi;
2902               if (place_after_defs)
2903                 {
2904                   gsi = gsi_for_stmt
2905                           (vect_find_last_scalar_stmt_in_slp (slp_node));
2906                   init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2907                 }
2908               else
2909                 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
2910               if (ctor_seq != NULL)
2911                 {
2912                   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
2913                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2914                                                         GSI_SAME_STMT);
2915                   ctor_seq = NULL;
2916                 }
2917               voprnds.quick_push (init);
2918               place_after_defs = false;
2919             }
2920         }
2921     }
2922
2923   /* Since the vectors are created in the reverse order, we should invert
2924      them.  */
2925   vec_num = voprnds.length ();
2926   for (j = vec_num; j != 0; j--)
2927     {
2928       vop = voprnds[j - 1];
2929       vec_oprnds->quick_push (vop);
2930     }
2931
2932   voprnds.release ();
2933
2934   /* In case that VF is greater than the unrolling factor needed for the SLP
2935      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2936      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2937      to replicate the vectors.  */
2938   while (number_of_vectors > vec_oprnds->length ())
2939     {
2940       tree neutral_vec = NULL;
2941
2942       if (neutral_op)
2943         {
2944           if (!neutral_vec)
2945             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2946
2947           vec_oprnds->quick_push (neutral_vec);
2948         }
2949       else
2950         {
2951           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2952             vec_oprnds->quick_push (vop);
2953         }
2954     }
2955 }
2956
2957
2958 /* Get vectorized definitions from SLP_NODE that contains corresponding
2959    vectorized def-stmts.  */
2960
2961 static void
2962 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2963 {
2964   tree vec_oprnd;
2965   gimple vec_def_stmt;
2966   unsigned int i;
2967
2968   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2969
2970   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2971     {
2972       gcc_assert (vec_def_stmt);
2973       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2974       vec_oprnds->quick_push (vec_oprnd);
2975     }
2976 }
2977
2978
2979 /* Get vectorized definitions for SLP_NODE.
2980    If the scalar definitions are loop invariants or constants, collect them and
2981    call vect_get_constant_vectors() to create vector stmts.
2982    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2983    must be stored in the corresponding child of SLP_NODE, and we call
2984    vect_get_slp_vect_defs () to retrieve them.  */
2985
2986 void
2987 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2988                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2989 {
2990   gimple first_stmt;
2991   int number_of_vects = 0, i;
2992   unsigned int child_index = 0;
2993   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2994   slp_tree child = NULL;
2995   vec<tree> vec_defs;
2996   tree oprnd;
2997   bool vectorized_defs;
2998
2999   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3000   FOR_EACH_VEC_ELT (ops, i, oprnd)
3001     {
3002       /* For each operand we check if it has vectorized definitions in a child
3003          node or we need to create them (for invariants and constants).  We
3004          check if the LHS of the first stmt of the next child matches OPRND.
3005          If it does, we found the correct child.  Otherwise, we call
3006          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3007          to check this child node for the next operand.  */
3008       vectorized_defs = false;
3009       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3010         {
3011           child = SLP_TREE_CHILDREN (slp_node)[child_index];
3012
3013           /* We have to check both pattern and original def, if available.  */
3014           if (child)
3015             {
3016               gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3017               gimple related
3018                 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3019
3020               if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
3021                   || (related
3022                       && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3023                 {
3024                   /* The number of vector defs is determined by the number of
3025                      vector statements in the node from which we get those
3026                      statements.  */
3027                   number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3028                   vectorized_defs = true;
3029                   child_index++;
3030                 }
3031             }
3032           else
3033             child_index++;
3034         }
3035
3036       if (!vectorized_defs)
3037         {
3038           if (i == 0)
3039             {
3040               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3041               /* Number of vector stmts was calculated according to LHS in
3042                  vect_schedule_slp_instance (), fix it by replacing LHS with
3043                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
3044                  details.  */
3045               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3046                                              &rhs_size_unit);
3047               if (rhs_size_unit != lhs_size_unit)
3048                 {
3049                   number_of_vects *= rhs_size_unit;
3050                   number_of_vects /= lhs_size_unit;
3051                 }
3052             }
3053         }
3054
3055       /* Allocate memory for vectorized defs.  */
3056       vec_defs = vNULL;
3057       vec_defs.create (number_of_vects);
3058
3059       /* For reduction defs we call vect_get_constant_vectors (), since we are
3060          looking for initial loop invariant values.  */
3061       if (vectorized_defs && reduc_index == -1)
3062         /* The defs are already vectorized.  */
3063         vect_get_slp_vect_defs (child, &vec_defs);
3064       else
3065         /* Build vectors from scalar defs.  */
3066         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3067                                    number_of_vects, reduc_index);
3068
3069       vec_oprnds->quick_push (vec_defs);
3070
3071       /* For reductions, we only need initial values.  */
3072       if (reduc_index != -1)
3073         return;
3074     }
3075 }
3076
3077
3078 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
3079    building a vector of type MASK_TYPE from it) and two input vectors placed in
3080    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3081    shifting by STRIDE elements of DR_CHAIN for every copy.
3082    (STRIDE is the number of vectorized stmts for NODE divided by the number of
3083    copies).
3084    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3085    the created stmts must be inserted.  */
3086
3087 static inline void
3088 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
3089                            tree mask, int first_vec_indx, int second_vec_indx,
3090                            gimple_stmt_iterator *gsi, slp_tree node,
3091                            tree vectype, vec<tree> dr_chain,
3092                            int ncopies, int vect_stmts_counter)
3093 {
3094   tree perm_dest;
3095   gimple perm_stmt = NULL;
3096   stmt_vec_info next_stmt_info;
3097   int i, stride;
3098   tree first_vec, second_vec, data_ref;
3099
3100   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3101
3102   /* Initialize the vect stmts of NODE to properly insert the generated
3103      stmts later.  */
3104   for (i = SLP_TREE_VEC_STMTS (node).length ();
3105        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3106     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3107
3108   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3109   for (i = 0; i < ncopies; i++)
3110     {
3111       first_vec = dr_chain[first_vec_indx];
3112       second_vec = dr_chain[second_vec_indx];
3113
3114       /* Generate the permute statement.  */
3115       perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3116                                        first_vec, second_vec, mask);
3117       data_ref = make_ssa_name (perm_dest, perm_stmt);
3118       gimple_set_lhs (perm_stmt, data_ref);
3119       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3120
3121       /* Store the vector statement in NODE.  */
3122       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
3123
3124       first_vec_indx += stride;
3125       second_vec_indx += stride;
3126     }
3127
3128   /* Mark the scalar stmt as vectorized.  */
3129   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
3130   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
3131 }
3132
3133
3134 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
3135    return in CURRENT_MASK_ELEMENT its equivalent in target specific
3136    representation.  Check that the mask is valid and return FALSE if not.
3137    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3138    the next vector, i.e., the current first vector is not needed.  */
3139
3140 static bool
3141 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
3142                        int mask_nunits, bool only_one_vec, int index,
3143                        unsigned char *mask, int *current_mask_element,
3144                        bool *need_next_vector, int *number_of_mask_fixes,
3145                        bool *mask_fixed, bool *needs_first_vector)
3146 {
3147   int i;
3148
3149   /* Convert to target specific representation.  */
3150   *current_mask_element = first_mask_element + m;
3151   /* Adjust the value in case it's a mask for second and third vectors.  */
3152   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
3153
3154   if (*current_mask_element < 0)
3155     {
3156       if (dump_enabled_p ())
3157         {
3158           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3159                            "permutation requires past vector ");
3160           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3161           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3162         }
3163       return false;
3164     }
3165
3166   if (*current_mask_element < mask_nunits)
3167     *needs_first_vector = true;
3168
3169   /* We have only one input vector to permute but the mask accesses values in
3170      the next vector as well.  */
3171   if (only_one_vec && *current_mask_element >= mask_nunits)
3172     {
3173       if (dump_enabled_p ())
3174         {
3175           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3176                            "permutation requires at least two vectors ");
3177           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3178           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3179         }
3180
3181       return false;
3182     }
3183
3184   /* The mask requires the next vector.  */
3185   while (*current_mask_element >= mask_nunits * 2)
3186     {
3187       if (*needs_first_vector || *mask_fixed)
3188         {
3189           /* We either need the first vector too or have already moved to the
3190              next vector. In both cases, this permutation needs three
3191              vectors.  */
3192           if (dump_enabled_p ())
3193             {
3194               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3195                                "permutation requires at "
3196                                "least three vectors ");
3197               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3198               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3199             }
3200
3201           return false;
3202         }
3203
3204       /* We move to the next vector, dropping the first one and working with
3205          the second and the third - we need to adjust the values of the mask
3206          accordingly.  */
3207       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
3208
3209       for (i = 0; i < index; i++)
3210         mask[i] -= mask_nunits * *number_of_mask_fixes;
3211
3212       (*number_of_mask_fixes)++;
3213       *mask_fixed = true;
3214     }
3215
3216   *need_next_vector = *mask_fixed;
3217
3218   /* This was the last element of this mask. Start a new one.  */
3219   if (index == mask_nunits - 1)
3220     {
3221       *number_of_mask_fixes = 1;
3222       *mask_fixed = false;
3223       *needs_first_vector = false;
3224     }
3225
3226   return true;
3227 }
3228
3229
3230 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3231    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3232    permute statements for the SLP node NODE of the SLP instance
3233    SLP_NODE_INSTANCE.  */
3234
3235 bool
3236 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3237                               gimple_stmt_iterator *gsi, int vf,
3238                               slp_instance slp_node_instance, bool analyze_only)
3239 {
3240   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3241   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3242   tree mask_element_type = NULL_TREE, mask_type;
3243   int i, j, k, nunits, vec_index = 0, scalar_index;
3244   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3245   gimple next_scalar_stmt;
3246   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3247   int first_mask_element;
3248   int index, unroll_factor, current_mask_element, ncopies;
3249   unsigned char *mask;
3250   bool only_one_vec = false, need_next_vector = false;
3251   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
3252   int number_of_mask_fixes = 1;
3253   bool mask_fixed = false;
3254   bool needs_first_vector = false;
3255   machine_mode mode;
3256
3257   mode = TYPE_MODE (vectype);
3258
3259   if (!can_vec_perm_p (mode, false, NULL))
3260     {
3261       if (dump_enabled_p ())
3262         {
3263           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3264                            "no vect permute for ");
3265           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3266           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3267         }
3268       return false;
3269     }
3270
3271   /* The generic VEC_PERM_EXPR code always uses an integral type of the
3272      same size as the vector element being permuted.  */
3273   mask_element_type = lang_hooks.types.type_for_mode
3274                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3275   mask_type = get_vectype_for_scalar_type (mask_element_type);
3276   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3277   mask = XALLOCAVEC (unsigned char, nunits);
3278   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3279
3280   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3281      unrolling factor.  */
3282   orig_vec_stmts_num = group_size *
3283                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
3284   if (orig_vec_stmts_num == 1)
3285     only_one_vec = true;
3286
3287   /* Number of copies is determined by the final vectorization factor
3288      relatively to SLP_NODE_INSTANCE unrolling factor.  */
3289   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3290
3291   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3292     return false;
3293
3294   /* Generate permutation masks for every NODE. Number of masks for each NODE
3295      is equal to GROUP_SIZE.
3296      E.g., we have a group of three nodes with three loads from the same
3297      location in each node, and the vector size is 4. I.e., we have a
3298      a0b0c0a1b1c1... sequence and we need to create the following vectors:
3299      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3300      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3301      ...
3302
3303      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3304      The last mask is illegal since we assume two operands for permute
3305      operation, and the mask element values can't be outside that range.
3306      Hence, the last mask must be converted into {2,5,5,5}.
3307      For the first two permutations we need the first and the second input
3308      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3309      we need the second and the third vectors: {b1,c1,a2,b2} and
3310      {c2,a3,b3,c3}.  */
3311
3312   {
3313       scalar_index = 0;
3314       index = 0;
3315       vect_stmts_counter = 0;
3316       vec_index = 0;
3317       first_vec_index = vec_index++;
3318       if (only_one_vec)
3319         second_vec_index = first_vec_index;
3320       else
3321         second_vec_index =  vec_index++;
3322
3323       for (j = 0; j < unroll_factor; j++)
3324         {
3325           for (k = 0; k < group_size; k++)
3326             {
3327               i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3328               first_mask_element = i + j * group_size;
3329               if (!vect_get_mask_element (stmt, first_mask_element, 0,
3330                                           nunits, only_one_vec, index,
3331                                           mask, &current_mask_element,
3332                                           &need_next_vector,
3333                                           &number_of_mask_fixes, &mask_fixed,
3334                                           &needs_first_vector))
3335                 return false;
3336               gcc_assert (current_mask_element >= 0
3337                           && current_mask_element < 2 * nunits);
3338               mask[index++] = current_mask_element;
3339
3340               if (index == nunits)
3341                 {
3342                   index = 0;
3343                   if (!can_vec_perm_p (mode, false, mask))
3344                     {
3345                       if (dump_enabled_p ())
3346                         {
3347                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3348                                            vect_location, 
3349                                            "unsupported vect permute { ");
3350                           for (i = 0; i < nunits; ++i)
3351                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3352                                          mask[i]);
3353                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3354                         }
3355                       return false;
3356                     }
3357
3358                   if (!analyze_only)
3359                     {
3360                       int l;
3361                       tree mask_vec, *mask_elts;
3362                       mask_elts = XALLOCAVEC (tree, nunits);
3363                       for (l = 0; l < nunits; ++l)
3364                         mask_elts[l] = build_int_cst (mask_element_type,
3365                                                       mask[l]);
3366                       mask_vec = build_vector (mask_type, mask_elts);
3367
3368                       if (need_next_vector)
3369                         {
3370                           first_vec_index = second_vec_index;
3371                           second_vec_index = vec_index;
3372                         }
3373
3374                       next_scalar_stmt
3375                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3376
3377                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
3378                                mask_vec, first_vec_index, second_vec_index,
3379                                gsi, node, vectype, dr_chain,
3380                                ncopies, vect_stmts_counter++);
3381                     }
3382                 }
3383             }
3384         }
3385     }
3386
3387   return true;
3388 }
3389
3390
3391
3392 /* Vectorize SLP instance tree in postorder.  */
3393
3394 static bool
3395 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3396                             unsigned int vectorization_factor)
3397 {
3398   gimple stmt;
3399   bool grouped_store, is_store;
3400   gimple_stmt_iterator si;
3401   stmt_vec_info stmt_info;
3402   unsigned int vec_stmts_size, nunits, group_size;
3403   tree vectype;
3404   int i;
3405   slp_tree child;
3406
3407   if (!node)
3408     return false;
3409
3410   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3411     vect_schedule_slp_instance (child, instance, vectorization_factor);
3412
3413   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3414   stmt_info = vinfo_for_stmt (stmt);
3415
3416   /* VECTYPE is the type of the destination.  */
3417   vectype = STMT_VINFO_VECTYPE (stmt_info);
3418   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3419   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3420
3421   /* For each SLP instance calculate number of vector stmts to be created
3422      for the scalar stmts in each node of the SLP tree.  Number of vector
3423      elements in one vector iteration is the number of scalar elements in
3424      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3425      size.
3426      Unless this is a SLP reduction in which case the number of vector
3427      stmts is equal to the number of vector stmts of the children.  */
3428   if (GROUP_FIRST_ELEMENT (stmt_info)
3429       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3430     vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3431   else
3432     vec_stmts_size = (vectorization_factor * group_size) / nunits;
3433
3434   if (!SLP_TREE_VEC_STMTS (node).exists ())
3435     {
3436       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3437       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3438     }
3439
3440   if (dump_enabled_p ())
3441     {
3442       dump_printf_loc (MSG_NOTE,vect_location,
3443                        "------>vectorizing SLP node starting from: ");
3444       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3445       dump_printf (MSG_NOTE, "\n");
3446     }
3447
3448   /* Vectorized stmts go before the last scalar stmt which is where
3449      all uses are ready.  */
3450   si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3451
3452   /* Mark the first element of the reduction chain as reduction to properly
3453      transform the node.  In the analysis phase only the last element of the
3454      chain is marked as reduction.  */
3455   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3456       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3457     {
3458       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3459       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3460     }
3461
3462   /* Handle two-operation SLP nodes by vectorizing the group with
3463      both operations and then performing a merge.  */
3464   if (SLP_TREE_TWO_OPERATORS (node))
3465     {
3466       enum tree_code code0 = gimple_assign_rhs_code (stmt);
3467       enum tree_code ocode;
3468       gimple ostmt;
3469       unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3470       bool allsame = true;
3471       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3472         if (gimple_assign_rhs_code (ostmt) != code0)
3473           {
3474             mask[i] = 1;
3475             allsame = false;
3476             ocode = gimple_assign_rhs_code (ostmt);
3477           }
3478         else
3479           mask[i] = 0;
3480       if (!allsame)
3481         {
3482           vec<gimple> v0;
3483           vec<gimple> v1;
3484           unsigned j;
3485           tree tmask = NULL_TREE;
3486           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3487           v0 = SLP_TREE_VEC_STMTS (node).copy ();
3488           SLP_TREE_VEC_STMTS (node).truncate (0);
3489           gimple_assign_set_rhs_code (stmt, ocode);
3490           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3491           gimple_assign_set_rhs_code (stmt, code0);
3492           v1 = SLP_TREE_VEC_STMTS (node).copy ();
3493           SLP_TREE_VEC_STMTS (node).truncate (0);
3494           tree meltype = build_nonstandard_integer_type
3495               (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3496           tree mvectype = get_same_sized_vectype (meltype, vectype);
3497           unsigned k = 0, l;
3498           for (j = 0; j < v0.length (); ++j)
3499             {
3500               tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3501               for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3502                 {
3503                   if (k >= group_size)
3504                     k = 0;
3505                   melts[l] = build_int_cst
3506                       (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3507                 }
3508               tmask = build_vector (mvectype, melts);
3509
3510               /* ???  Not all targets support a VEC_PERM_EXPR with a
3511                  constant mask that would translate to a vec_merge RTX
3512                  (with their vec_perm_const_ok).  We can either not
3513                  vectorize in that case or let veclower do its job.
3514                  Unfortunately that isn't too great and at least for
3515                  plus/minus we'd eventually like to match targets
3516                  vector addsub instructions.  */
3517               gimple vstmt;
3518               vstmt = gimple_build_assign (make_ssa_name (vectype),
3519                                            VEC_PERM_EXPR,
3520                                            gimple_assign_lhs (v0[j]),
3521                                            gimple_assign_lhs (v1[j]), tmask);
3522               vect_finish_stmt_generation (stmt, vstmt, &si);
3523               SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3524             }
3525           v0.release ();
3526           v1.release ();
3527           return false;
3528         }
3529     }
3530   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3531   return is_store;
3532 }
3533
3534 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3535    For loop vectorization this is done in vectorizable_call, but for SLP
3536    it needs to be deferred until end of vect_schedule_slp, because multiple
3537    SLP instances may refer to the same scalar stmt.  */
3538
3539 static void
3540 vect_remove_slp_scalar_calls (slp_tree node)
3541 {
3542   gimple stmt, new_stmt;
3543   gimple_stmt_iterator gsi;
3544   int i;
3545   slp_tree child;
3546   tree lhs;
3547   stmt_vec_info stmt_info;
3548
3549   if (!node)
3550     return;
3551
3552   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3553     vect_remove_slp_scalar_calls (child);
3554
3555   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3556     {
3557       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3558         continue;
3559       stmt_info = vinfo_for_stmt (stmt);
3560       if (stmt_info == NULL
3561           || is_pattern_stmt_p (stmt_info)
3562           || !PURE_SLP_STMT (stmt_info))
3563         continue;
3564       lhs = gimple_call_lhs (stmt);
3565       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3566       set_vinfo_for_stmt (new_stmt, stmt_info);
3567       set_vinfo_for_stmt (stmt, NULL);
3568       STMT_VINFO_STMT (stmt_info) = new_stmt;
3569       gsi = gsi_for_stmt (stmt);
3570       gsi_replace (&gsi, new_stmt, false);
3571       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3572     }
3573 }
3574
3575 /* Generate vector code for all SLP instances in the loop/basic block.  */
3576
3577 bool
3578 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3579 {
3580   vec<slp_instance> slp_instances;
3581   slp_instance instance;
3582   unsigned int i, vf;
3583   bool is_store = false;
3584
3585   if (loop_vinfo)
3586     {
3587       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3588       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3589     }
3590   else
3591     {
3592       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3593       vf = 1;
3594     }
3595
3596   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3597     {
3598       /* Schedule the tree of INSTANCE.  */
3599       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3600                                              instance, vf);
3601       if (dump_enabled_p ())
3602         dump_printf_loc (MSG_NOTE, vect_location,
3603                          "vectorizing stmts using SLP.\n");
3604     }
3605
3606   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3607     {
3608       slp_tree root = SLP_INSTANCE_TREE (instance);
3609       gimple store;
3610       unsigned int j;
3611       gimple_stmt_iterator gsi;
3612
3613       /* Remove scalar call stmts.  Do not do this for basic-block
3614          vectorization as not all uses may be vectorized.
3615          ???  Why should this be necessary?  DCE should be able to
3616          remove the stmts itself.
3617          ???  For BB vectorization we can as well remove scalar
3618          stmts starting from the SLP tree root if they have no
3619          uses.  */
3620       if (loop_vinfo)
3621         vect_remove_slp_scalar_calls (root);
3622
3623       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3624                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3625         {
3626           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3627             break;
3628
3629          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3630            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3631           /* Free the attached stmt_vec_info and remove the stmt.  */
3632           gsi = gsi_for_stmt (store);
3633           unlink_stmt_vdef (store);
3634           gsi_remove (&gsi, true);
3635           release_defs (store);
3636           free_stmt_vec_info (store);
3637         }
3638     }
3639
3640   return is_store;
3641 }
3642
3643
3644 /* Vectorize the basic block.  */
3645
3646 void
3647 vect_slp_transform_bb (basic_block bb)
3648 {
3649   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3650   gimple_stmt_iterator si;
3651
3652   gcc_assert (bb_vinfo);
3653
3654   if (dump_enabled_p ())
3655     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3656
3657   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3658     {
3659       gimple stmt = gsi_stmt (si);
3660       stmt_vec_info stmt_info;
3661
3662       if (dump_enabled_p ())
3663         {
3664           dump_printf_loc (MSG_NOTE, vect_location,
3665                            "------>SLPing statement: ");
3666           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3667           dump_printf (MSG_NOTE, "\n");
3668         }
3669
3670       stmt_info = vinfo_for_stmt (stmt);
3671       gcc_assert (stmt_info);
3672
3673       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3674       if (STMT_SLP_TYPE (stmt_info))
3675         {
3676           vect_schedule_slp (NULL, bb_vinfo);
3677           break;
3678         }
3679     }
3680
3681   if (dump_enabled_p ())
3682     dump_printf_loc (MSG_NOTE, vect_location,
3683                      "BASIC BLOCK VECTORIZED\n");
3684
3685   destroy_bb_vec_info (bb_vinfo);
3686 }