tree-vect-slp.c (vect_bb_slp_scalar_cost): Guard vinfo access on whether the use...
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2013 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h"              /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40
41 /* Extract the location of the basic block in the source code.
42    Return the basic block location if succeed and NULL if not.  */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47   gimple stmt = NULL;
48   gimple_stmt_iterator si;
49
50   if (!bb)
51     return UNKNOWN_LOC;
52
53   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54     {
55       stmt = gsi_stmt (si);
56       if (gimple_location (stmt) != UNKNOWN_LOC)
57         return gimple_location (stmt);
58     }
59
60   return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69   int i;
70   slp_tree child;
71
72   if (!node)
73     return;
74
75   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76     vect_free_slp_tree (child);
77
78   SLP_TREE_CHILDREN (node).release ();
79   SLP_TREE_SCALAR_STMTS (node).release ();
80   SLP_TREE_VEC_STMTS (node).release ();
81   SLP_TREE_LOAD_PERMUTATION (node).release ();
82
83   free (node);
84 }
85
86
87 /* Free the memory allocated for the SLP instance.  */
88
89 void
90 vect_free_slp_instance (slp_instance instance)
91 {
92   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93   SLP_INSTANCE_LOADS (instance).release ();
94   SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95   free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS.  */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
103 {
104   slp_tree node;
105   gimple stmt = scalar_stmts[0];
106   unsigned int nops;
107
108   if (is_gimple_call (stmt))
109     nops = gimple_call_num_args (stmt);
110   else if (is_gimple_assign (stmt))
111     {
112       nops = gimple_num_ops (stmt) - 1;
113       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114         nops++;
115     }
116   else
117     return NULL;
118
119   node = XNEW (struct _slp_tree);
120   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121   SLP_TREE_VEC_STMTS (node).create (0);
122   SLP_TREE_CHILDREN (node).create (nops);
123   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
124
125   return node;
126 }
127
128
129 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
130    operand.  */
131 static vec<slp_oprnd_info> 
132 vect_create_oprnd_info (int nops, int group_size)
133 {
134   int i;
135   slp_oprnd_info oprnd_info;
136   vec<slp_oprnd_info> oprnds_info;
137
138   oprnds_info.create (nops);
139   for (i = 0; i < nops; i++)
140     {
141       oprnd_info = XNEW (struct _slp_oprnd_info);
142       oprnd_info->def_stmts.create (group_size);
143       oprnd_info->first_dt = vect_uninitialized_def;
144       oprnd_info->first_op_type = NULL_TREE;
145       oprnd_info->first_pattern = false;
146       oprnds_info.quick_push (oprnd_info);
147     }
148
149   return oprnds_info;
150 }
151
152
153 /* Free operands info.  */
154
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
157 {
158   int i;
159   slp_oprnd_info oprnd_info;
160
161   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
162     {
163       oprnd_info->def_stmts.release ();
164       XDELETE (oprnd_info);
165     }
166
167   oprnds_info.release ();
168 }
169
170
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
173
174 static int
175 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
176 {
177   gimple next_stmt = first_stmt;
178   int result = 0;
179
180   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181     return -1;
182
183   do
184     {
185       if (next_stmt == stmt)
186         return result;
187       result++;
188       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
189     }
190   while (next_stmt);
191
192   return -1;
193 }
194
195
196 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
197    they are of a valid type and that they match the defs of the first stmt of
198    the SLP group (stored in OPRNDS_INFO).  */
199
200 static bool
201 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
202                              gimple stmt, bool first,
203                              vec<slp_oprnd_info> *oprnds_info)
204 {
205   tree oprnd;
206   unsigned int i, number_of_oprnds;
207   tree def;
208   gimple def_stmt;
209   enum vect_def_type dt = vect_uninitialized_def;
210   struct loop *loop = NULL;
211   bool pattern = false;
212   slp_oprnd_info oprnd_info;
213   int op_idx = 1;
214   tree compare_rhs = NULL_TREE;
215
216   if (loop_vinfo)
217     loop = LOOP_VINFO_LOOP (loop_vinfo);
218
219   if (is_gimple_call (stmt))
220     {
221       number_of_oprnds = gimple_call_num_args (stmt);
222       op_idx = 3;
223     }
224   else if (is_gimple_assign (stmt))
225     {
226       number_of_oprnds = gimple_num_ops (stmt) - 1;
227       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
228         number_of_oprnds++;
229     }
230   else
231     return false;
232
233   for (i = 0; i < number_of_oprnds; i++)
234     {
235       if (compare_rhs)
236         {
237           oprnd = compare_rhs;
238           compare_rhs = NULL_TREE;
239         }
240       else
241         oprnd = gimple_op (stmt, op_idx++);
242
243       oprnd_info = (*oprnds_info)[i];
244
245       if (COMPARISON_CLASS_P (oprnd))
246         {
247           compare_rhs = TREE_OPERAND (oprnd, 1);
248           oprnd = TREE_OPERAND (oprnd, 0);
249         }
250
251       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
252                                &def, &dt)
253           || (!def_stmt && dt != vect_constant_def))
254         {
255           if (dump_enabled_p ())
256             {
257               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
258                                "Build SLP failed: can't find def for ");
259               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
260             }
261
262           return false;
263         }
264
265       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
266          from the pattern.  Check that all the stmts of the node are in the
267          pattern.  */
268       if (def_stmt && gimple_bb (def_stmt)
269           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
270               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
271                   && gimple_code (def_stmt) != GIMPLE_PHI))
272           && vinfo_for_stmt (def_stmt)
273           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
274           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
275           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
276         {
277           pattern = true;
278           if (!first && !oprnd_info->first_pattern)
279             {
280               if (dump_enabled_p ())
281                 {
282                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
283                                    "Build SLP failed: some of the stmts"
284                                    " are in a pattern, and others are not ");
285                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
286                 }
287
288               return false;
289             }
290
291           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
292           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
293
294           if (dt == vect_unknown_def_type)
295             {
296               if (dump_enabled_p ())
297                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
298                                  "Unsupported pattern.");
299               return false;
300             }
301
302           switch (gimple_code (def_stmt))
303             {
304               case GIMPLE_PHI:
305                 def = gimple_phi_result (def_stmt);
306                 break;
307
308               case GIMPLE_ASSIGN:
309                 def = gimple_assign_lhs (def_stmt);
310                 break;
311
312               default:
313                 if (dump_enabled_p ())
314                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
315                                    "unsupported defining stmt: ");
316                 return false;
317             }
318         }
319
320       if (first)
321         {
322           oprnd_info->first_dt = dt;
323           oprnd_info->first_pattern = pattern;
324           oprnd_info->first_op_type = TREE_TYPE (oprnd);
325         }
326       else
327         {
328           /* Not first stmt of the group, check that the def-stmt/s match
329              the def-stmt/s of the first stmt.  Allow different definition
330              types for reduction chains: the first stmt must be a
331              vect_reduction_def (a phi node), and the rest
332              vect_internal_def.  */
333           if (((oprnd_info->first_dt != dt
334                 && !(oprnd_info->first_dt == vect_reduction_def
335                      && dt == vect_internal_def)
336                 && !((oprnd_info->first_dt == vect_external_def
337                       || oprnd_info->first_dt == vect_constant_def)
338                      && (dt == vect_external_def
339                          || dt == vect_constant_def)))
340                || !types_compatible_p (oprnd_info->first_op_type,
341                                        TREE_TYPE (oprnd))))
342             {
343               if (dump_enabled_p ())
344                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
345                                  "Build SLP failed: different types ");
346
347               return false;
348             }
349         }
350
351       /* Check the types of the definitions.  */
352       switch (dt)
353         {
354         case vect_constant_def:
355         case vect_external_def:
356         case vect_reduction_def:
357           break;
358
359         case vect_internal_def:
360           oprnd_info->def_stmts.quick_push (def_stmt);
361           break;
362
363         default:
364           /* FORNOW: Not supported.  */
365           if (dump_enabled_p ())
366             {
367               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
368                                "Build SLP failed: illegal type of def ");
369               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
370             }
371
372           return false;
373         }
374     }
375
376   return true;
377 }
378
379
380 /* Verify if the scalar stmts STMTS are isomorphic, require data
381    permutation or are of unsupported types of operation.  Return
382    true if they are, otherwise return false and indicate in *MATCHES
383    which stmts are not isomorphic to the first one.  If MATCHES[0]
384    is false then this indicates the comparison could not be
385    carried out or the stmts will never be vectorized by SLP.  */
386
387 static bool
388 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
389                        vec<gimple> stmts, unsigned int group_size,
390                        unsigned nops, unsigned int *max_nunits,
391                        unsigned int vectorization_factor, bool *matches)
392 {
393   unsigned int i;
394   gimple stmt = stmts[0];
395   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
396   enum tree_code first_cond_code = ERROR_MARK;
397   tree lhs;
398   bool need_same_oprnds = false;
399   tree vectype, scalar_type, first_op1 = NULL_TREE;
400   optab optab;
401   int icode;
402   enum machine_mode optab_op2_mode;
403   enum machine_mode vec_mode;
404   struct data_reference *first_dr;
405   HOST_WIDE_INT dummy;
406   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
407   tree cond;
408
409   /* For every stmt in NODE find its def stmt/s.  */
410   FOR_EACH_VEC_ELT (stmts, i, stmt)
411     {
412       matches[i] = false;
413
414       if (dump_enabled_p ())
415         {
416           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
417           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
418         }
419
420       /* Fail to vectorize statements marked as unvectorizable.  */
421       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
422         {
423           if (dump_enabled_p ())
424             {
425               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
426                                "Build SLP failed: unvectorizable statement ");
427               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
428             }
429           /* Fatal mismatch.  */
430           matches[0] = false;
431           return false;
432         }
433
434       lhs = gimple_get_lhs (stmt);
435       if (lhs == NULL_TREE)
436         {
437           if (dump_enabled_p ())
438             {
439               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440                                "Build SLP failed: not GIMPLE_ASSIGN nor "
441                                "GIMPLE_CALL ");
442               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
443             }
444           /* Fatal mismatch.  */
445           matches[0] = false;
446           return false;
447         }
448
449        if (is_gimple_assign (stmt)
450            && gimple_assign_rhs_code (stmt) == COND_EXPR
451            && (cond = gimple_assign_rhs1 (stmt))
452            && !COMPARISON_CLASS_P (cond))
453         {
454           if (dump_enabled_p ())
455             {
456               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
457                                "Build SLP failed: condition is not "
458                                "comparison ");
459               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
460             }
461           /* Fatal mismatch.  */
462           matches[0] = false;
463           return false;
464         }
465
466       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
467       vectype = get_vectype_for_scalar_type (scalar_type);
468       if (!vectype)
469         {
470           if (dump_enabled_p ())
471             {
472               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
473                                "Build SLP failed: unsupported data-type ");
474               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
475                                  scalar_type);
476             }
477           /* Fatal mismatch.  */
478           matches[0] = false;
479           return false;
480         }
481
482       /* In case of multiple types we need to detect the smallest type.  */
483       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
484         {
485           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
486           if (bb_vinfo)
487             vectorization_factor = *max_nunits;
488         }
489
490       if (is_gimple_call (stmt))
491         {
492           rhs_code = CALL_EXPR;
493           if (gimple_call_internal_p (stmt)
494               || gimple_call_tail_p (stmt)
495               || gimple_call_noreturn_p (stmt)
496               || !gimple_call_nothrow_p (stmt)
497               || gimple_call_chain (stmt))
498             {
499               if (dump_enabled_p ())
500                 {
501                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
502                                    "Build SLP failed: unsupported call type ");
503                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
504                 }
505               /* Fatal mismatch.  */
506               matches[0] = false;
507               return false;
508             }
509         }
510       else
511         rhs_code = gimple_assign_rhs_code (stmt);
512
513       /* Check the operation.  */
514       if (i == 0)
515         {
516           first_stmt_code = rhs_code;
517
518           /* Shift arguments should be equal in all the packed stmts for a
519              vector shift with scalar shift operand.  */
520           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
521               || rhs_code == LROTATE_EXPR
522               || rhs_code == RROTATE_EXPR)
523             {
524               vec_mode = TYPE_MODE (vectype);
525
526               /* First see if we have a vector/vector shift.  */
527               optab = optab_for_tree_code (rhs_code, vectype,
528                                            optab_vector);
529
530               if (!optab
531                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
532                 {
533                   /* No vector/vector shift, try for a vector/scalar shift.  */
534                   optab = optab_for_tree_code (rhs_code, vectype,
535                                                optab_scalar);
536
537                   if (!optab)
538                     {
539                       if (dump_enabled_p ())
540                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
541                                          "Build SLP failed: no optab.");
542                       /* Fatal mismatch.  */
543                       matches[0] = false;
544                       return false;
545                     }
546                   icode = (int) optab_handler (optab, vec_mode);
547                   if (icode == CODE_FOR_nothing)
548                     {
549                       if (dump_enabled_p ())
550                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
551                                          "Build SLP failed: "
552                                          "op not supported by target.");
553                       /* Fatal mismatch.  */
554                       matches[0] = false;
555                       return false;
556                     }
557                   optab_op2_mode = insn_data[icode].operand[2].mode;
558                   if (!VECTOR_MODE_P (optab_op2_mode))
559                     {
560                       need_same_oprnds = true;
561                       first_op1 = gimple_assign_rhs2 (stmt);
562                     }
563                 }
564             }
565           else if (rhs_code == WIDEN_LSHIFT_EXPR)
566             {
567               need_same_oprnds = true;
568               first_op1 = gimple_assign_rhs2 (stmt);
569             }
570         }
571       else
572         {
573           if (first_stmt_code != rhs_code
574               && (first_stmt_code != IMAGPART_EXPR
575                   || rhs_code != REALPART_EXPR)
576               && (first_stmt_code != REALPART_EXPR
577                   || rhs_code != IMAGPART_EXPR)
578               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
579                    && (first_stmt_code == ARRAY_REF
580                        || first_stmt_code == BIT_FIELD_REF
581                        || first_stmt_code == INDIRECT_REF
582                        || first_stmt_code == COMPONENT_REF
583                        || first_stmt_code == MEM_REF)))
584             {
585               if (dump_enabled_p ())
586                 {
587                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
588                                    "Build SLP failed: different operation "
589                                    "in stmt ");
590                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
591                 }
592               /* Mismatch.  */
593               continue;
594             }
595
596           if (need_same_oprnds
597               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
598             {
599               if (dump_enabled_p ())
600                 {
601                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
602                                    "Build SLP failed: different shift "
603                                    "arguments in ");
604                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
605                 }
606               /* Mismatch.  */
607               continue;
608             }
609
610           if (rhs_code == CALL_EXPR)
611             {
612               gimple first_stmt = stmts[0];
613               if (gimple_call_num_args (stmt) != nops
614                   || !operand_equal_p (gimple_call_fn (first_stmt),
615                                        gimple_call_fn (stmt), 0)
616                   || gimple_call_fntype (first_stmt)
617                      != gimple_call_fntype (stmt))
618                 {
619                   if (dump_enabled_p ())
620                     {
621                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
622                                        "Build SLP failed: different calls in ");
623                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
624                                         stmt, 0);
625                     }
626                   /* Mismatch.  */
627                   continue;
628                 }
629             }
630         }
631
632       /* Grouped store or load.  */
633       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
634         {
635           if (REFERENCE_CLASS_P (lhs))
636             {
637               /* Store.  */
638               ;
639             }
640           else
641             {
642               /* Load.  */
643               unsigned unrolling_factor
644                 = least_common_multiple
645                     (*max_nunits, group_size) / group_size;
646               /* FORNOW: Check that there is no gap between the loads
647                  and no gap between the groups when we need to load
648                  multiple groups at once.
649                  ???  We should enhance this to only disallow gaps
650                  inside vectors.  */
651               if ((unrolling_factor > 1
652                    && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
653                    && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
654                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
655                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
656                 {
657                   if (dump_enabled_p ())
658                     {
659                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
660                                        "Build SLP failed: grouped "
661                                        "loads have gaps ");
662                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
663                                         stmt, 0);
664                     }
665                   /* Fatal mismatch.  */
666                   matches[0] = false;
667                   return false;
668                 }
669
670               /* Check that the size of interleaved loads group is not
671                  greater than the SLP group size.  */
672               unsigned ncopies
673                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
674               if (loop_vinfo
675                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
676                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
677                        - GROUP_GAP (vinfo_for_stmt (stmt)))
678                       > ncopies * group_size))
679                 {
680                   if (dump_enabled_p ())
681                     {
682                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683                                        "Build SLP failed: the number "
684                                        "of interleaved loads is greater than "
685                                        "the SLP group size ");
686                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
687                                         stmt, 0);
688                     }
689                   /* Fatal mismatch.  */
690                   matches[0] = false;
691                   return false;
692                 }
693
694               old_first_load = first_load;
695               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
696               if (prev_first_load)
697                 {
698                   /* Check that there are no loads from different interleaving
699                      chains in the same node.  */
700                   if (prev_first_load != first_load)
701                     {
702                       if (dump_enabled_p ())
703                         {
704                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
705                                            vect_location, 
706                                            "Build SLP failed: different "
707                                            "interleaving chains in one node ");
708                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
709                                             stmt, 0);
710                         }
711                       /* Mismatch.  */
712                       continue;
713                     }
714                 }
715               else
716                 prev_first_load = first_load;
717
718               /* In some cases a group of loads is just the same load
719                  repeated N times.  Only analyze its cost once.  */
720               if (first_load == stmt && old_first_load != first_load)
721                 {
722                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
723                   if (vect_supportable_dr_alignment (first_dr, false)
724                       == dr_unaligned_unsupported)
725                     {
726                       if (dump_enabled_p ())
727                         {
728                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
729                                            vect_location, 
730                                            "Build SLP failed: unsupported "
731                                            "unaligned load ");
732                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
733                                             stmt, 0);
734                         }
735                       /* Fatal mismatch.  */
736                       matches[0] = false;
737                       return false;
738                     }
739                 }
740            }
741         } /* Grouped access.  */
742       else
743         {
744           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
745             {
746               /* Not grouped load.  */
747               if (dump_enabled_p ())
748                 {
749                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
750                                    "Build SLP failed: not grouped load ");
751                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
752                 }
753
754               /* FORNOW: Not grouped loads are not supported.  */
755               /* Fatal mismatch.  */
756               matches[0] = false;
757               return false;
758             }
759
760           /* Not memory operation.  */
761           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
762               && TREE_CODE_CLASS (rhs_code) != tcc_unary
763               && rhs_code != COND_EXPR
764               && rhs_code != CALL_EXPR)
765             {
766               if (dump_enabled_p ())
767                 {
768                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769                                    "Build SLP failed: operation");
770                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
771                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
772                 }
773               /* Fatal mismatch.  */
774               matches[0] = false;
775               return false;
776             }
777
778           if (rhs_code == COND_EXPR)
779             {
780               tree cond_expr = gimple_assign_rhs1 (stmt);
781
782               if (i == 0)
783                 first_cond_code = TREE_CODE (cond_expr);
784               else if (first_cond_code != TREE_CODE (cond_expr))
785                 {
786                   if (dump_enabled_p ())
787                     {
788                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
789                                        "Build SLP failed: different"
790                                        " operation");
791                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
792                                         stmt, 0);
793                     }
794                   /* Mismatch.  */
795                   continue;
796                 }
797             }
798         }
799
800       matches[i] = true;
801     }
802
803   for (i = 0; i < group_size; ++i)
804     if (!matches[i])
805       return false;
806
807   return true;
808 }
809
810 /* Recursively build an SLP tree starting from NODE.
811    Fail (and return a value not equal to zero) if def-stmts are not
812    isomorphic, require data permutation or are of unsupported types of
813    operation.  Otherwise, return 0.
814    The value returned is the depth in the SLP tree where a mismatch
815    was found.  */
816
817 static bool
818 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
819                      slp_tree *node, unsigned int group_size,
820                      unsigned int *max_nunits,
821                      vec<slp_tree> *loads,
822                      unsigned int vectorization_factor,
823                      bool *matches, unsigned *npermutes)
824 {
825   unsigned nops, i, this_npermutes = 0;
826   gimple stmt;
827
828   if (!matches)
829     matches = XALLOCAVEC (bool, group_size);
830   if (!npermutes)
831     npermutes = &this_npermutes;
832
833   matches[0] = false;
834
835   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
836   if (is_gimple_call (stmt))
837     nops = gimple_call_num_args (stmt);
838   else if (is_gimple_assign (stmt))
839     {
840       nops = gimple_num_ops (stmt) - 1;
841       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
842         nops++;
843     }
844   else
845     return false;
846
847   if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
848                               SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
849                               max_nunits, vectorization_factor, matches))
850     return false;
851
852   /* If the SLP node is a load, terminate the recursion.  */
853   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
854       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
855     {
856       loads->safe_push (*node);
857       return true;
858     }
859
860   /* Get at the operands, verifying they are compatible.  */
861   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
862   slp_oprnd_info oprnd_info;
863   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
864     {
865       if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
866                                         stmt, (i == 0), &oprnds_info))
867         {
868           vect_free_oprnd_info (oprnds_info);
869           return false;
870         }
871     }
872
873   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
874
875   /* Create SLP_TREE nodes for the definition node/s.  */
876   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
877     {
878       slp_tree child;
879       unsigned old_nloads = loads->length ();
880       unsigned old_max_nunits = *max_nunits;
881
882       if (oprnd_info->first_dt != vect_internal_def)
883         continue;
884
885       child = vect_create_new_slp_node (oprnd_info->def_stmts);
886       if (!child)
887         {
888           vect_free_oprnd_info (oprnds_info);
889           return false;
890         }
891
892       bool *matches = XALLOCAVEC (bool, group_size);
893       if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
894                                group_size, max_nunits, loads,
895                                vectorization_factor, matches, npermutes))
896         {
897           oprnd_info->def_stmts = vNULL;
898           SLP_TREE_CHILDREN (*node).quick_push (child);
899           continue;
900         }
901
902       /* If the SLP build for operand zero failed and operand zero
903          and one can be commutated try that for the scalar stmts
904          that failed the match.  */
905       if (i == 0
906           /* A first scalar stmt mismatch signals a fatal mismatch.  */
907           && matches[0]
908           /* ???  For COND_EXPRs we can swap the comparison operands
909              as well as the arms under some constraints.  */
910           && nops == 2
911           && oprnds_info[1]->first_dt == vect_internal_def
912           && is_gimple_assign (stmt)
913           && commutative_tree_code (gimple_assign_rhs_code (stmt))
914           /* Do so only if the number of not successful permutes was nor more
915              than a cut-ff as re-trying the recursive match on
916              possibly each level of the tree would expose exponential
917              behavior.  */
918           && *npermutes < 4)
919         {
920           /* Roll back.  */
921           *max_nunits = old_max_nunits;
922           loads->truncate (old_nloads);
923           /* Swap mismatched definition stmts.  */
924           for (unsigned j = 0; j < group_size; ++j)
925             if (!matches[j])
926               {
927                 gimple tem = oprnds_info[0]->def_stmts[j];
928                 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
929                 oprnds_info[1]->def_stmts[j] = tem;
930               }
931           /* And try again ... */
932           if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
933                                    group_size, max_nunits, loads,
934                                    vectorization_factor,
935                                    matches, npermutes))
936             {
937               oprnd_info->def_stmts = vNULL;
938               SLP_TREE_CHILDREN (*node).quick_push (child);
939               continue;
940             }
941
942           ++*npermutes;
943         }
944
945       oprnd_info->def_stmts = vNULL;
946       vect_free_slp_tree (child);
947       vect_free_oprnd_info (oprnds_info);
948       return false;
949     }
950
951   vect_free_oprnd_info (oprnds_info);
952   return true;
953 }
954
955 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
956
957 static void
958 vect_print_slp_tree (int dump_kind, slp_tree node)
959 {
960   int i;
961   gimple stmt;
962   slp_tree child;
963
964   if (!node)
965     return;
966
967   dump_printf (dump_kind, "node ");
968   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
969     {
970       dump_printf (dump_kind, "\n\tstmt %d ", i);
971       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
972     }
973   dump_printf (dump_kind, "\n");
974
975   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
976     vect_print_slp_tree (dump_kind, child);
977 }
978
979
980 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
981    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
982    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
983    stmts in NODE are to be marked.  */
984
985 static void
986 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
987 {
988   int i;
989   gimple stmt;
990   slp_tree child;
991
992   if (!node)
993     return;
994
995   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
996     if (j < 0 || i == j)
997       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
998
999   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1000     vect_mark_slp_stmts (child, mark, j);
1001 }
1002
1003
1004 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1005
1006 static void
1007 vect_mark_slp_stmts_relevant (slp_tree node)
1008 {
1009   int i;
1010   gimple stmt;
1011   stmt_vec_info stmt_info;
1012   slp_tree child;
1013
1014   if (!node)
1015     return;
1016
1017   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1018     {
1019       stmt_info = vinfo_for_stmt (stmt);
1020       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1021                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1022       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1023     }
1024
1025   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1026     vect_mark_slp_stmts_relevant (child);
1027 }
1028
1029
1030 /* Rearrange the statements of NODE according to PERMUTATION.  */
1031
1032 static void
1033 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1034                           vec<unsigned> permutation)
1035 {
1036   gimple stmt;
1037   vec<gimple> tmp_stmts;
1038   unsigned int i;
1039   slp_tree child;
1040
1041   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1042     vect_slp_rearrange_stmts (child, group_size, permutation);
1043
1044   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1045   tmp_stmts.create (group_size);
1046   tmp_stmts.quick_grow_cleared (group_size);
1047
1048   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1049     tmp_stmts[permutation[i]] = stmt;
1050
1051   SLP_TREE_SCALAR_STMTS (node).release ();
1052   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1053 }
1054
1055
1056 /* Check if the required load permutations in the SLP instance
1057    SLP_INSTN are supported.  */
1058
1059 static bool
1060 vect_supported_load_permutation_p (slp_instance slp_instn)
1061 {
1062   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1063   unsigned int i, j, k, next;
1064   sbitmap load_index;
1065   slp_tree node;
1066   gimple stmt, load, next_load, first_load;
1067   struct data_reference *dr;
1068
1069   if (dump_enabled_p ())
1070     {
1071       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1072       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1073         if (node->load_permutation.exists ())
1074           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1075             dump_printf (MSG_NOTE, "%d ", next);
1076         else
1077           for (i = 0; i < group_size; ++i)
1078             dump_printf (MSG_NOTE, "%d ", i);
1079     }
1080
1081   /* In case of reduction every load permutation is allowed, since the order
1082      of the reduction statements is not important (as opposed to the case of
1083      grouped stores).  The only condition we need to check is that all the
1084      load nodes are of the same size and have the same permutation (and then
1085      rearrange all the nodes of the SLP instance according to this 
1086      permutation).  */
1087
1088   /* Check that all the load nodes are of the same size.  */
1089   /* ???  Can't we assert this? */
1090   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1091     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1092       return false;
1093
1094   node = SLP_INSTANCE_TREE (slp_instn);
1095   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1096
1097   /* Reduction (there are no data-refs in the root).
1098      In reduction chain the order of the loads is important.  */
1099   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1100       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1101     {
1102       slp_tree load;
1103       unsigned int lidx;
1104
1105       /* Compare all the permutation sequences to the first one.  We know
1106          that at least one load is permuted.  */
1107       node = SLP_INSTANCE_LOADS (slp_instn)[0];
1108       if (!node->load_permutation.exists ())
1109         return false;
1110       for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1111         {
1112           if (!load->load_permutation.exists ())
1113             return false;
1114           FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1115             if (lidx != node->load_permutation[j])
1116               return false;
1117         }
1118
1119       /* Check that the loads in the first sequence are different and there
1120          are no gaps between them.  */
1121       load_index = sbitmap_alloc (group_size);
1122       bitmap_clear (load_index);
1123       FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1124         {
1125           if (bitmap_bit_p (load_index, lidx))
1126             {
1127               sbitmap_free (load_index);
1128               return false;
1129             }
1130           bitmap_set_bit (load_index, lidx);
1131         }
1132       for (i = 0; i < group_size; i++)
1133         if (!bitmap_bit_p (load_index, i))
1134           {
1135             sbitmap_free (load_index);
1136             return false;
1137           }
1138       sbitmap_free (load_index);
1139
1140       /* This permutation is valid for reduction.  Since the order of the
1141          statements in the nodes is not important unless they are memory
1142          accesses, we can rearrange the statements in all the nodes
1143          according to the order of the loads.  */
1144       vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1145                                 node->load_permutation);
1146
1147       /* We are done, no actual permutations need to be generated.  */
1148       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1149         SLP_TREE_LOAD_PERMUTATION (node).release ();
1150       return true;
1151     }
1152
1153   /* In basic block vectorization we allow any subchain of an interleaving
1154      chain.
1155      FORNOW: not supported in loop SLP because of realignment compications.  */
1156   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1157     {
1158       /* Check that for every node in the instance the loads
1159          form a subchain.  */
1160       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1161         {
1162           next_load = NULL;
1163           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1164             {
1165               if (j != 0 && next_load != load)
1166                 return false;
1167               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1168             }
1169         }
1170
1171       /* Check that the alignment of the first load in every subchain, i.e.,
1172          the first statement in every load node, is supported.
1173          ???  This belongs in alignment checking.  */
1174       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1175         {
1176           first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1177           if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1178             {
1179               dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1180               if (vect_supportable_dr_alignment (dr, false)
1181                   == dr_unaligned_unsupported)
1182                 {
1183                   if (dump_enabled_p ())
1184                     {
1185                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1186                                        vect_location,
1187                                        "unsupported unaligned load ");
1188                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1189                                         first_load, 0);
1190                     }
1191                   return false;
1192                 }
1193             }
1194         }
1195
1196       /* We are done, no actual permutations need to be generated.  */
1197       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1198         SLP_TREE_LOAD_PERMUTATION (node).release ();
1199       return true;
1200     }
1201
1202   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1203      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1204      well (unless it's reduction).  */
1205   if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1206     return false;
1207   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1208     if (!node->load_permutation.exists ())
1209       return false;
1210
1211   load_index = sbitmap_alloc (group_size);
1212   bitmap_clear (load_index);
1213   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1214     {
1215       unsigned int lidx = node->load_permutation[0];
1216       if (bitmap_bit_p (load_index, lidx))
1217         {
1218           sbitmap_free (load_index);
1219           return false;
1220         }
1221       bitmap_set_bit (load_index, lidx);
1222       FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1223         if (k != lidx)
1224           {
1225             sbitmap_free (load_index);
1226             return false;
1227           }
1228     }
1229   for (i = 0; i < group_size; i++)
1230     if (!bitmap_bit_p (load_index, i))
1231       {
1232         sbitmap_free (load_index);
1233         return false;
1234       }
1235   sbitmap_free (load_index);
1236
1237   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1238     if (node->load_permutation.exists ()
1239         && !vect_transform_slp_perm_load
1240               (node, vNULL, NULL,
1241                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1242       return false;
1243   return true;
1244 }
1245
1246
1247 /* Find the first load in the loop that belongs to INSTANCE.
1248    When loads are in several SLP nodes, there can be a case in which the first
1249    load does not appear in the first SLP node to be transformed, causing
1250    incorrect order of statements.  Since we generate all the loads together,
1251    they must be inserted before the first load of the SLP instance and not
1252    before the first load of the first node of the instance.  */
1253
1254 static gimple
1255 vect_find_first_load_in_slp_instance (slp_instance instance)
1256 {
1257   int i, j;
1258   slp_tree load_node;
1259   gimple first_load = NULL, load;
1260
1261   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1262     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1263       first_load = get_earlier_stmt (load, first_load);
1264
1265   return first_load;
1266 }
1267
1268
1269 /* Find the last store in SLP INSTANCE.  */
1270
1271 static gimple
1272 vect_find_last_store_in_slp_instance (slp_instance instance)
1273 {
1274   int i;
1275   slp_tree node;
1276   gimple last_store = NULL, store;
1277
1278   node = SLP_INSTANCE_TREE (instance);
1279   for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1280     last_store = get_later_stmt (store, last_store);
1281
1282   return last_store;
1283 }
1284
1285 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1286
1287 static void
1288 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1289                          slp_instance instance, slp_tree node,
1290                          stmt_vector_for_cost *prologue_cost_vec,
1291                          unsigned ncopies_for_cost)
1292 {
1293   stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1294
1295   unsigned i;
1296   slp_tree child;
1297   gimple stmt, s;
1298   stmt_vec_info stmt_info;
1299   tree lhs;
1300   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1301
1302   /* Recurse down the SLP tree.  */
1303   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1304     vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1305                              instance, child, prologue_cost_vec,
1306                              ncopies_for_cost);
1307
1308   /* Look at the first scalar stmt to determine the cost.  */
1309   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1310   stmt_info = vinfo_for_stmt (stmt);
1311   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1312     {
1313       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1314         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1315                                vect_uninitialized_def,
1316                                node, prologue_cost_vec, body_cost_vec);
1317       else
1318         {
1319           int i;
1320           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1321           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1322                                 node, prologue_cost_vec, body_cost_vec);
1323           /* If the load is permuted record the cost for the permutation.
1324              ???  Loads from multiple chains are let through here only
1325              for a single special case involving complex numbers where
1326              in the end no permutation is necessary.  */
1327           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1328             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1329                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1330                 && vect_get_place_in_interleaving_chain
1331                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1332               {
1333                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1334                                   stmt_info, 0, vect_body);
1335                 break;
1336               }
1337         }
1338     }
1339   else
1340     record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1341                       stmt_info, 0, vect_body);
1342
1343   /* Scan operands and account for prologue cost of constants/externals.
1344      ???  This over-estimates cost for multiple uses and should be
1345      re-engineered.  */
1346   lhs = gimple_get_lhs (stmt);
1347   for (i = 0; i < gimple_num_ops (stmt); ++i)
1348     {
1349       tree def, op = gimple_op (stmt, i);
1350       gimple def_stmt;
1351       enum vect_def_type dt;
1352       if (!op || op == lhs)
1353         continue;
1354       if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1355                               &def_stmt, &def, &dt)
1356           && (dt == vect_constant_def || dt == vect_external_def))
1357         record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1358                           stmt_info, 0, vect_prologue);
1359     }
1360 }
1361
1362 /* Compute the cost for the SLP instance INSTANCE.  */
1363
1364 static void
1365 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1366                        slp_instance instance, unsigned nunits)
1367 {
1368   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1369   unsigned ncopies_for_cost;
1370   stmt_info_for_cost *si;
1371   unsigned i;
1372
1373   /* Calculate the number of vector stmts to create based on the unrolling
1374      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1375      GROUP_SIZE / NUNITS otherwise.  */
1376   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1377   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1378
1379   prologue_cost_vec.create (10);
1380   body_cost_vec.create (10);
1381   SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1382   vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1383                            instance, SLP_INSTANCE_TREE (instance),
1384                            &prologue_cost_vec, ncopies_for_cost);
1385
1386   /* Record the prologue costs, which were delayed until we were
1387      sure that SLP was successful.  Unlike the body costs, we know
1388      the final values now regardless of the loop vectorization factor.  */
1389   void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1390                 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1391   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1392     {
1393       struct _stmt_vec_info *stmt_info
1394         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1395       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1396                             si->misalign, vect_prologue);
1397     }
1398
1399   prologue_cost_vec.release ();
1400 }
1401
1402 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1403    vect_build_slp_tree to build a tree of packed stmts if possible.
1404    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1405
1406 static bool
1407 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1408                            gimple stmt)
1409 {
1410   slp_instance new_instance;
1411   slp_tree node;
1412   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1413   unsigned int unrolling_factor = 1, nunits;
1414   tree vectype, scalar_type = NULL_TREE;
1415   gimple next;
1416   unsigned int vectorization_factor = 0;
1417   int i;
1418   unsigned int max_nunits = 0;
1419   vec<slp_tree> loads;
1420   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1421   vec<gimple> scalar_stmts;
1422
1423   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1424     {
1425       if (dr)
1426         {
1427           scalar_type = TREE_TYPE (DR_REF (dr));
1428           vectype = get_vectype_for_scalar_type (scalar_type);
1429         }
1430       else
1431         {
1432           gcc_assert (loop_vinfo);
1433           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1434         }
1435
1436       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1437     }
1438   else
1439     {
1440       gcc_assert (loop_vinfo);
1441       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1442       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1443     }
1444
1445   if (!vectype)
1446     {
1447       if (dump_enabled_p ())
1448         {
1449           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1450                            "Build SLP failed: unsupported data-type ");
1451           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1452         }
1453
1454       return false;
1455     }
1456
1457   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1458   if (loop_vinfo)
1459     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1460   else
1461     vectorization_factor = nunits;
1462
1463   /* Calculate the unrolling factor.  */
1464   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1465   if (unrolling_factor != 1 && !loop_vinfo)
1466     {
1467       if (dump_enabled_p ())
1468         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1469                          "Build SLP failed: unrolling required in basic"
1470                          " block SLP");
1471
1472       return false;
1473     }
1474
1475   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1476   scalar_stmts.create (group_size);
1477   next = stmt;
1478   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1479     {
1480       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1481       while (next)
1482         {
1483           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1484               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1485             scalar_stmts.safe_push (
1486                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1487           else
1488             scalar_stmts.safe_push (next);
1489           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1490         }
1491     }
1492   else
1493     {
1494       /* Collect reduction statements.  */
1495       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1496       for (i = 0; reductions.iterate (i, &next); i++)
1497         scalar_stmts.safe_push (next);
1498     }
1499
1500   node = vect_create_new_slp_node (scalar_stmts);
1501
1502   loads.create (group_size);
1503
1504   /* Build the tree for the SLP instance.  */
1505   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1506                            &max_nunits, &loads,
1507                            vectorization_factor, NULL, NULL))
1508     {
1509       /* Calculate the unrolling factor based on the smallest type.  */
1510       if (max_nunits > nunits)
1511         unrolling_factor = least_common_multiple (max_nunits, group_size)
1512                            / group_size;
1513
1514       if (unrolling_factor != 1 && !loop_vinfo)
1515         {
1516           if (dump_enabled_p ())
1517             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1518                              "Build SLP failed: unrolling required in basic"
1519                              " block SLP");
1520           vect_free_slp_tree (node);
1521           loads.release ();
1522           return false;
1523         }
1524
1525       /* Create a new SLP instance.  */
1526       new_instance = XNEW (struct _slp_instance);
1527       SLP_INSTANCE_TREE (new_instance) = node;
1528       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1529       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1530       SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1531       SLP_INSTANCE_LOADS (new_instance) = loads;
1532       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1533
1534       /* Compute the load permutation.  */
1535       slp_tree load_node;
1536       bool loads_permuted = false;
1537       FOR_EACH_VEC_ELT (loads, i, load_node)
1538         {
1539           vec<unsigned> load_permutation;
1540           int j;
1541           gimple load, first_stmt;
1542           bool this_load_permuted = false;
1543           load_permutation.create (group_size);
1544           first_stmt = GROUP_FIRST_ELEMENT
1545               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1546           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1547             {
1548               int load_place
1549                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1550               gcc_assert (load_place != -1);
1551               if (load_place != j)
1552                 this_load_permuted = true;
1553               load_permutation.safe_push (load_place);
1554             }
1555           if (!this_load_permuted)
1556             {
1557               load_permutation.release ();
1558               continue;
1559             }
1560           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1561           loads_permuted = true;
1562         }
1563
1564       if (loads_permuted)
1565         {
1566           if (!vect_supported_load_permutation_p (new_instance))
1567             {
1568               if (dump_enabled_p ())
1569                 {
1570                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1571                                    "Build SLP failed: unsupported load "
1572                                    "permutation ");
1573                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1574                 }
1575               vect_free_slp_instance (new_instance);
1576               return false;
1577             }
1578
1579           SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1580             = vect_find_first_load_in_slp_instance (new_instance);
1581         }
1582
1583       /* Compute the costs of this SLP instance.  */
1584       vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1585                              new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1586
1587       if (loop_vinfo)
1588         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1589       else
1590         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1591
1592       if (dump_enabled_p ())
1593         vect_print_slp_tree (MSG_NOTE, node);
1594
1595       return true;
1596     }
1597
1598   /* Failed to SLP.  */
1599   /* Free the allocated memory.  */
1600   vect_free_slp_tree (node);
1601   loads.release ();
1602
1603   return false;
1604 }
1605
1606
1607 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1608    trees of packed scalar stmts if SLP is possible.  */
1609
1610 bool
1611 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1612 {
1613   unsigned int i;
1614   vec<gimple> grouped_stores;
1615   vec<gimple> reductions = vNULL;
1616   vec<gimple> reduc_chains = vNULL;
1617   gimple first_element;
1618   bool ok = false;
1619
1620   if (dump_enabled_p ())
1621     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1622
1623   if (loop_vinfo)
1624     {
1625       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1626       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1627       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1628     }
1629   else
1630     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1631
1632   /* Find SLP sequences starting from groups of grouped stores.  */
1633   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1634     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1635       ok = true;
1636
1637   if (bb_vinfo && !ok)
1638     {
1639       if (dump_enabled_p ())
1640         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1641                          "Failed to SLP the basic block.");
1642
1643       return false;
1644     }
1645
1646   if (loop_vinfo
1647       && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1648     {
1649       /* Find SLP sequences starting from reduction chains.  */
1650       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1651         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1652           ok = true;
1653         else
1654           return false;
1655
1656       /* Don't try to vectorize SLP reductions if reduction chain was
1657          detected.  */
1658       return ok;
1659     }
1660
1661   /* Find SLP sequences starting from groups of reductions.  */
1662   if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1663       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1664     ok = true;
1665
1666   return true;
1667 }
1668
1669
1670 /* For each possible SLP instance decide whether to SLP it and calculate overall
1671    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1672    least one instance.  */
1673
1674 bool
1675 vect_make_slp_decision (loop_vec_info loop_vinfo)
1676 {
1677   unsigned int i, unrolling_factor = 1;
1678   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1679   slp_instance instance;
1680   int decided_to_slp = 0;
1681
1682   if (dump_enabled_p ())
1683     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1684
1685   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1686     {
1687       /* FORNOW: SLP if you can.  */
1688       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1689         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1690
1691       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1692          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1693          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1694       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1695       decided_to_slp++;
1696     }
1697
1698   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1699
1700   if (decided_to_slp && dump_enabled_p ())
1701     dump_printf_loc (MSG_NOTE, vect_location,
1702                      "Decided to SLP %d instances. Unrolling factor %d",
1703                      decided_to_slp, unrolling_factor);
1704
1705   return (decided_to_slp > 0);
1706 }
1707
1708
1709 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1710    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1711
1712 static void
1713 vect_detect_hybrid_slp_stmts (slp_tree node)
1714 {
1715   int i;
1716   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1717   gimple stmt = stmts[0];
1718   imm_use_iterator imm_iter;
1719   gimple use_stmt;
1720   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1721   slp_tree child;
1722   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1723   struct loop *loop = NULL;
1724   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1725   basic_block bb = NULL;
1726
1727   if (!node)
1728     return;
1729
1730   if (loop_vinfo)
1731     loop = LOOP_VINFO_LOOP (loop_vinfo);
1732   else
1733     bb = BB_VINFO_BB (bb_vinfo);
1734
1735   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1736     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1737         && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1738       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1739         if (gimple_bb (use_stmt)
1740             && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1741                  || bb == gimple_bb (use_stmt))
1742             && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1743             && !STMT_SLP_TYPE (stmt_vinfo)
1744             && (STMT_VINFO_RELEVANT (stmt_vinfo)
1745                 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1746             && !(gimple_code (use_stmt) == GIMPLE_PHI
1747                  && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1748                   == vect_reduction_def))
1749           vect_mark_slp_stmts (node, hybrid, i);
1750
1751   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1752     vect_detect_hybrid_slp_stmts (child);
1753 }
1754
1755
1756 /* Find stmts that must be both vectorized and SLPed.  */
1757
1758 void
1759 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1760 {
1761   unsigned int i;
1762   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1763   slp_instance instance;
1764
1765   if (dump_enabled_p ())
1766     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1767
1768   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1769     vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1770 }
1771
1772
1773 /* Create and initialize a new bb_vec_info struct for BB, as well as
1774    stmt_vec_info structs for all the stmts in it.  */
1775
1776 static bb_vec_info
1777 new_bb_vec_info (basic_block bb)
1778 {
1779   bb_vec_info res = NULL;
1780   gimple_stmt_iterator gsi;
1781
1782   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1783   BB_VINFO_BB (res) = bb;
1784
1785   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1786     {
1787       gimple stmt = gsi_stmt (gsi);
1788       gimple_set_uid (stmt, 0);
1789       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1790     }
1791
1792   BB_VINFO_GROUPED_STORES (res).create (10);
1793   BB_VINFO_SLP_INSTANCES (res).create (2);
1794   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1795
1796   bb->aux = res;
1797   return res;
1798 }
1799
1800
1801 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1802    stmts in the basic block.  */
1803
1804 static void
1805 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1806 {
1807   vec<slp_instance> slp_instances;
1808   slp_instance instance;
1809   basic_block bb;
1810   gimple_stmt_iterator si;
1811   unsigned i;
1812
1813   if (!bb_vinfo)
1814     return;
1815
1816   bb = BB_VINFO_BB (bb_vinfo);
1817
1818   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1819     {
1820       gimple stmt = gsi_stmt (si);
1821       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1822
1823       if (stmt_info)
1824         /* Free stmt_vec_info.  */
1825         free_stmt_vec_info (stmt);
1826     }
1827
1828   free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1829   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1830   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1831   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1832   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1833     vect_free_slp_instance (instance);
1834   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1835   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1836   free (bb_vinfo);
1837   bb->aux = NULL;
1838 }
1839
1840
1841 /* Analyze statements contained in SLP tree node after recursively analyzing
1842    the subtree. Return TRUE if the operations are supported.  */
1843
1844 static bool
1845 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1846 {
1847   bool dummy;
1848   int i;
1849   gimple stmt;
1850   slp_tree child;
1851
1852   if (!node)
1853     return true;
1854
1855   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1856     if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1857       return false;
1858
1859   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1860     {
1861       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1862       gcc_assert (stmt_info);
1863       gcc_assert (PURE_SLP_STMT (stmt_info));
1864
1865       if (!vect_analyze_stmt (stmt, &dummy, node))
1866         return false;
1867     }
1868
1869   return true;
1870 }
1871
1872
1873 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
1874    operations are supported. */
1875
1876 static bool
1877 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1878 {
1879   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1880   slp_instance instance;
1881   int i;
1882
1883   for (i = 0; slp_instances.iterate (i, &instance); )
1884     {
1885       if (!vect_slp_analyze_node_operations (bb_vinfo,
1886                                              SLP_INSTANCE_TREE (instance)))
1887         {
1888           vect_free_slp_instance (instance);
1889           slp_instances.ordered_remove (i);
1890         }
1891       else
1892         i++;
1893     }
1894
1895   if (!slp_instances.length ())
1896     return false;
1897
1898   return true;
1899 }
1900
1901
1902 /* Compute the scalar cost of the SLP node NODE and its children
1903    and return it.  Do not account defs that are marked in LIFE and
1904    update LIFE according to uses of NODE.  */
1905
1906 static unsigned
1907 vect_bb_slp_scalar_cost (basic_block bb,
1908                          slp_tree node, vec<bool, va_stack> life)
1909 {
1910   unsigned scalar_cost = 0;
1911   unsigned i;
1912   gimple stmt;
1913   slp_tree child;
1914
1915   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1916     {
1917       unsigned stmt_cost;
1918       ssa_op_iter op_iter;
1919       def_operand_p def_p;
1920       stmt_vec_info stmt_info;
1921
1922       if (life[i])
1923         continue;
1924
1925       /* If there is a non-vectorized use of the defs then the scalar
1926          stmt is kept live in which case we do not account it or any
1927          required defs in the SLP children in the scalar cost.  This
1928          way we make the vectorization more costly when compared to
1929          the scalar cost.  */
1930       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
1931         {
1932           imm_use_iterator use_iter;
1933           gimple use_stmt;
1934           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
1935             if (gimple_bb (use_stmt) != bb
1936                 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))
1937               {
1938                 life[i] = true;
1939                 BREAK_FROM_IMM_USE_STMT (use_iter);
1940               }
1941         }
1942       if (life[i])
1943         continue;
1944
1945       stmt_info = vinfo_for_stmt (stmt);
1946       if (STMT_VINFO_DATA_REF (stmt_info))
1947         {
1948           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1949             stmt_cost = vect_get_stmt_cost (scalar_load);
1950           else
1951             stmt_cost = vect_get_stmt_cost (scalar_store);
1952         }
1953       else
1954         stmt_cost = vect_get_stmt_cost (scalar_stmt);
1955
1956       scalar_cost += stmt_cost;
1957     }
1958
1959   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1960     scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
1961
1962   return scalar_cost;
1963 }
1964
1965 /* Check if vectorization of the basic block is profitable.  */
1966
1967 static bool
1968 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1969 {
1970   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1971   slp_instance instance;
1972   int i, j;
1973   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
1974   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
1975   void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
1976   stmt_vec_info stmt_info = NULL;
1977   stmt_vector_for_cost body_cost_vec;
1978   stmt_info_for_cost *ci;
1979
1980   /* Calculate vector costs.  */
1981   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1982     {
1983       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
1984
1985       FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
1986         {
1987           stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
1988           (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
1989                                 stmt_info, ci->misalign, vect_body);
1990         }
1991     }
1992
1993   /* Calculate scalar cost.  */
1994   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1995     {
1996       vec<bool, va_stack> life;
1997       vec_stack_alloc (bool, life, SLP_INSTANCE_GROUP_SIZE (instance));
1998       life.quick_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
1999       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2000                                               SLP_INSTANCE_TREE (instance),
2001                                               life);
2002       life.release ();
2003     }
2004
2005   /* Complete the target-specific cost calculation.  */
2006   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2007                &vec_inside_cost, &vec_epilogue_cost);
2008
2009   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2010
2011   if (dump_enabled_p ())
2012     {
2013       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2014       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2015                    vec_inside_cost);
2016       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2017       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2018       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d", scalar_cost);
2019     }
2020
2021   /* Vectorization is profitable if its cost is less than the cost of scalar
2022      version.  */
2023   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2024     return false;
2025
2026   return true;
2027 }
2028
2029 /* Check if the basic block can be vectorized.  */
2030
2031 static bb_vec_info
2032 vect_slp_analyze_bb_1 (basic_block bb)
2033 {
2034   bb_vec_info bb_vinfo;
2035   vec<slp_instance> slp_instances;
2036   slp_instance instance;
2037   int i;
2038   int min_vf = 2;
2039
2040   bb_vinfo = new_bb_vec_info (bb);
2041   if (!bb_vinfo)
2042     return NULL;
2043
2044   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2045     {
2046       if (dump_enabled_p ())
2047         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2048                          "not vectorized: unhandled data-ref in basic "
2049                          "block.\n");
2050
2051       destroy_bb_vec_info (bb_vinfo);
2052       return NULL;
2053     }
2054
2055   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2056     {
2057       if (dump_enabled_p ())
2058         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2059                          "not vectorized: not enough data-refs in "
2060                          "basic block.\n");
2061
2062       destroy_bb_vec_info (bb_vinfo);
2063       return NULL;
2064     }
2065
2066   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2067     {
2068      if (dump_enabled_p ())
2069        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2070                         "not vectorized: unhandled data access in "
2071                         "basic block.\n");
2072
2073       destroy_bb_vec_info (bb_vinfo);
2074       return NULL;
2075     }
2076
2077   vect_pattern_recog (NULL, bb_vinfo);
2078
2079   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2080      {
2081        if (dump_enabled_p ())
2082          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2083                           "not vectorized: unhandled data dependence "
2084                           "in basic block.\n");
2085
2086        destroy_bb_vec_info (bb_vinfo);
2087        return NULL;
2088      }
2089
2090   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2091     {
2092       if (dump_enabled_p ())
2093         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2094                          "not vectorized: bad data alignment in basic "
2095                          "block.\n");
2096
2097       destroy_bb_vec_info (bb_vinfo);
2098       return NULL;
2099     }
2100
2101   /* Check the SLP opportunities in the basic block, analyze and build SLP
2102      trees.  */
2103   if (!vect_analyze_slp (NULL, bb_vinfo))
2104     {
2105       if (dump_enabled_p ())
2106         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2107                          "not vectorized: failed to find SLP opportunities "
2108                          "in basic block.\n");
2109
2110       destroy_bb_vec_info (bb_vinfo);
2111       return NULL;
2112     }
2113
2114   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2115
2116   /* Mark all the statements that we want to vectorize as pure SLP and
2117      relevant.  */
2118   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2119     {
2120       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2121       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2122     }
2123
2124   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2125     {
2126       if (dump_enabled_p ())
2127         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2128                          "not vectorized: unsupported alignment in basic "
2129                          "block.\n");
2130       destroy_bb_vec_info (bb_vinfo);
2131       return NULL;
2132     }
2133
2134   if (!vect_slp_analyze_operations (bb_vinfo))
2135     {
2136       if (dump_enabled_p ())
2137         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2138                          "not vectorized: bad operation in basic block.\n");
2139
2140       destroy_bb_vec_info (bb_vinfo);
2141       return NULL;
2142     }
2143
2144   /* Cost model: check if the vectorization is worthwhile.  */
2145   if (flag_vect_cost_model
2146       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2147     {
2148       if (dump_enabled_p ())
2149         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2150                          "not vectorized: vectorization is not "
2151                          "profitable.\n");
2152
2153       destroy_bb_vec_info (bb_vinfo);
2154       return NULL;
2155     }
2156
2157   if (dump_enabled_p ())
2158     dump_printf_loc (MSG_NOTE, vect_location,
2159                      "Basic block will be vectorized using SLP\n");
2160
2161   return bb_vinfo;
2162 }
2163
2164
2165 bb_vec_info
2166 vect_slp_analyze_bb (basic_block bb)
2167 {
2168   bb_vec_info bb_vinfo;
2169   int insns = 0;
2170   gimple_stmt_iterator gsi;
2171   unsigned int vector_sizes;
2172
2173   if (dump_enabled_p ())
2174     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2175
2176   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2177     {
2178       gimple stmt = gsi_stmt (gsi);
2179       if (!is_gimple_debug (stmt)
2180           && !gimple_nop_p (stmt)
2181           && gimple_code (stmt) != GIMPLE_LABEL)
2182         insns++;
2183     }
2184
2185   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2186     {
2187       if (dump_enabled_p ())
2188         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2189                          "not vectorized: too many instructions in "
2190                          "basic block.\n");
2191
2192       return NULL;
2193     }
2194
2195   /* Autodetect first vector size we try.  */
2196   current_vector_size = 0;
2197   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2198
2199   while (1)
2200     {
2201       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2202       if (bb_vinfo)
2203         return bb_vinfo;
2204
2205       destroy_bb_vec_info (bb_vinfo);
2206
2207       vector_sizes &= ~current_vector_size;
2208       if (vector_sizes == 0
2209           || current_vector_size == 0)
2210         return NULL;
2211
2212       /* Try the next biggest vector size.  */
2213       current_vector_size = 1 << floor_log2 (vector_sizes);
2214       if (dump_enabled_p ())
2215         dump_printf_loc (MSG_NOTE, vect_location,
2216                          "***** Re-trying analysis with "
2217                          "vector size %d\n", current_vector_size);
2218     }
2219 }
2220
2221
2222 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2223    the number of created vector stmts depends on the unrolling factor).
2224    However, the actual number of vector stmts for every SLP node depends on
2225    VF which is set later in vect_analyze_operations ().  Hence, SLP costs
2226    should be updated.  In this function we assume that the inside costs
2227    calculated in vect_model_xxx_cost are linear in ncopies.  */
2228
2229 void
2230 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2231 {
2232   unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2233   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2234   slp_instance instance;
2235   stmt_vector_for_cost body_cost_vec;
2236   stmt_info_for_cost *si;
2237   void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2238
2239   if (dump_enabled_p ())
2240     dump_printf_loc (MSG_NOTE, vect_location,
2241                      "=== vect_update_slp_costs_according_to_vf ===");
2242
2243   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2244     {
2245       /* We assume that costs are linear in ncopies.  */
2246       int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2247
2248       /* Record the instance's instructions in the target cost model.
2249          This was delayed until here because the count of instructions
2250          isn't known beforehand.  */
2251       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2252
2253       FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2254         (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2255                               vinfo_for_stmt (si->stmt), si->misalign,
2256                               vect_body);
2257     }
2258 }
2259
2260
2261 /* For constant and loop invariant defs of SLP_NODE this function returns
2262    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2263    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2264    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2265    REDUC_INDEX is the index of the reduction operand in the statements, unless
2266    it is -1.  */
2267
2268 static void
2269 vect_get_constant_vectors (tree op, slp_tree slp_node,
2270                            vec<tree> *vec_oprnds,
2271                            unsigned int op_num, unsigned int number_of_vectors,
2272                            int reduc_index)
2273 {
2274   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2275   gimple stmt = stmts[0];
2276   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2277   unsigned nunits;
2278   tree vec_cst;
2279   tree *elts;
2280   unsigned j, number_of_places_left_in_vector;
2281   tree vector_type;
2282   tree vop;
2283   int group_size = stmts.length ();
2284   unsigned int vec_num, i;
2285   unsigned number_of_copies = 1;
2286   vec<tree> voprnds;
2287   voprnds.create (number_of_vectors);
2288   bool constant_p, is_store;
2289   tree neutral_op = NULL;
2290   enum tree_code code = gimple_expr_code (stmt);
2291   gimple def_stmt;
2292   struct loop *loop;
2293   gimple_seq ctor_seq = NULL;
2294
2295   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2296       && reduc_index != -1)
2297     {
2298       op_num = reduc_index - 1;
2299       op = gimple_op (stmt, reduc_index);
2300       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2301          we need either neutral operands or the original operands.  See
2302          get_initial_def_for_reduction() for details.  */
2303       switch (code)
2304         {
2305           case WIDEN_SUM_EXPR:
2306           case DOT_PROD_EXPR:
2307           case PLUS_EXPR:
2308           case MINUS_EXPR:
2309           case BIT_IOR_EXPR:
2310           case BIT_XOR_EXPR:
2311              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2312                neutral_op = build_real (TREE_TYPE (op), dconst0);
2313              else
2314                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2315
2316              break;
2317
2318           case MULT_EXPR:
2319              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2320                neutral_op = build_real (TREE_TYPE (op), dconst1);
2321              else
2322                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2323
2324              break;
2325
2326           case BIT_AND_EXPR:
2327             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2328             break;
2329
2330           case MAX_EXPR:
2331           case MIN_EXPR:
2332             def_stmt = SSA_NAME_DEF_STMT (op);
2333             loop = (gimple_bb (stmt))->loop_father;
2334             neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2335                                                 loop_preheader_edge (loop));
2336             break;
2337
2338           default:
2339             neutral_op = NULL;
2340         }
2341     }
2342
2343   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2344     {
2345       is_store = true;
2346       op = gimple_assign_rhs1 (stmt);
2347     }
2348   else
2349     is_store = false;
2350
2351   gcc_assert (op);
2352
2353   if (CONSTANT_CLASS_P (op))
2354     constant_p = true;
2355   else
2356     constant_p = false;
2357
2358   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2359   gcc_assert (vector_type);
2360   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2361
2362   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2363      created vectors. It is greater than 1 if unrolling is performed.
2364
2365      For example, we have two scalar operands, s1 and s2 (e.g., group of
2366      strided accesses of size two), while NUNITS is four (i.e., four scalars
2367      of this type can be packed in a vector).  The output vector will contain
2368      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2369      will be 2).
2370
2371      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2372      containing the operands.
2373
2374      For example, NUNITS is four as before, and the group size is 8
2375      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2376      {s5, s6, s7, s8}.  */
2377
2378   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2379
2380   number_of_places_left_in_vector = nunits;
2381   elts = XALLOCAVEC (tree, nunits);
2382   for (j = 0; j < number_of_copies; j++)
2383     {
2384       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2385         {
2386           if (is_store)
2387             op = gimple_assign_rhs1 (stmt);
2388           else
2389             {
2390               switch (code)
2391                 {
2392                   case COND_EXPR:
2393                     if (op_num == 0 || op_num == 1)
2394                       {
2395                         tree cond = gimple_assign_rhs1 (stmt);
2396                         op = TREE_OPERAND (cond, op_num);
2397                       }
2398                     else
2399                       {
2400                         if (op_num == 2)
2401                           op = gimple_assign_rhs2 (stmt);
2402                         else
2403                           op = gimple_assign_rhs3 (stmt);
2404                       }
2405                     break;
2406
2407                   case CALL_EXPR:
2408                     op = gimple_call_arg (stmt, op_num);
2409                     break;
2410
2411                   case LSHIFT_EXPR:
2412                   case RSHIFT_EXPR:
2413                   case LROTATE_EXPR:
2414                   case RROTATE_EXPR:
2415                     op = gimple_op (stmt, op_num + 1);
2416                     /* Unlike the other binary operators, shifts/rotates have
2417                        the shift count being int, instead of the same type as
2418                        the lhs, so make sure the scalar is the right type if
2419                        we are dealing with vectors of
2420                        long long/long/short/char.  */
2421                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2422                       op = fold_convert (TREE_TYPE (vector_type), op);
2423                     break;
2424
2425                   default:
2426                     op = gimple_op (stmt, op_num + 1);
2427                     break;
2428                 }
2429             }
2430
2431           if (reduc_index != -1)
2432             {
2433               loop = (gimple_bb (stmt))->loop_father;
2434               def_stmt = SSA_NAME_DEF_STMT (op);
2435
2436               gcc_assert (loop);
2437
2438               /* Get the def before the loop.  In reduction chain we have only
2439                  one initial value.  */
2440               if ((j != (number_of_copies - 1)
2441                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2442                        && i != 0))
2443                   && neutral_op)
2444                 op = neutral_op;
2445               else
2446                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2447                                             loop_preheader_edge (loop));
2448             }
2449
2450           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2451           number_of_places_left_in_vector--;
2452           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2453             {
2454               if (CONSTANT_CLASS_P (op))
2455                 {
2456                   op = fold_unary (VIEW_CONVERT_EXPR,
2457                                    TREE_TYPE (vector_type), op);
2458                   gcc_assert (op && CONSTANT_CLASS_P (op));
2459                 }
2460               else
2461                 {
2462                   tree new_temp
2463                     = make_ssa_name (TREE_TYPE (vector_type), NULL);
2464                   gimple init_stmt;
2465                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2466                                op);               
2467                   init_stmt
2468                     = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2469                                                     new_temp, op, NULL_TREE);
2470                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2471                   op = new_temp;
2472                 }
2473             }
2474           elts[number_of_places_left_in_vector] = op;
2475           if (!CONSTANT_CLASS_P (op))
2476             constant_p = false;
2477
2478           if (number_of_places_left_in_vector == 0)
2479             {
2480               number_of_places_left_in_vector = nunits;
2481
2482               if (constant_p)
2483                 vec_cst = build_vector (vector_type, elts);
2484               else
2485                 {
2486                   vec<constructor_elt, va_gc> *v;
2487                   unsigned k;
2488                   vec_alloc (v, nunits);
2489                   for (k = 0; k < nunits; ++k)
2490                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2491                   vec_cst = build_constructor (vector_type, v);
2492                 }
2493               voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2494                                                     vector_type, NULL));
2495               if (ctor_seq != NULL)
2496                 {
2497                   gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2498                   gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2499                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2500                                                         GSI_SAME_STMT);
2501                   ctor_seq = NULL;
2502                 }
2503             }
2504         }
2505     }
2506
2507   /* Since the vectors are created in the reverse order, we should invert
2508      them.  */
2509   vec_num = voprnds.length ();
2510   for (j = vec_num; j != 0; j--)
2511     {
2512       vop = voprnds[j - 1];
2513       vec_oprnds->quick_push (vop);
2514     }
2515
2516   voprnds.release ();
2517
2518   /* In case that VF is greater than the unrolling factor needed for the SLP
2519      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2520      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2521      to replicate the vectors.  */
2522   while (number_of_vectors > vec_oprnds->length ())
2523     {
2524       tree neutral_vec = NULL;
2525
2526       if (neutral_op)
2527         {
2528           if (!neutral_vec)
2529             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2530
2531           vec_oprnds->quick_push (neutral_vec);
2532         }
2533       else
2534         {
2535           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2536             vec_oprnds->quick_push (vop);
2537         }
2538     }
2539 }
2540
2541
2542 /* Get vectorized definitions from SLP_NODE that contains corresponding
2543    vectorized def-stmts.  */
2544
2545 static void
2546 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2547 {
2548   tree vec_oprnd;
2549   gimple vec_def_stmt;
2550   unsigned int i;
2551
2552   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2553
2554   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2555     {
2556       gcc_assert (vec_def_stmt);
2557       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2558       vec_oprnds->quick_push (vec_oprnd);
2559     }
2560 }
2561
2562
2563 /* Get vectorized definitions for SLP_NODE.
2564    If the scalar definitions are loop invariants or constants, collect them and
2565    call vect_get_constant_vectors() to create vector stmts.
2566    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2567    must be stored in the corresponding child of SLP_NODE, and we call
2568    vect_get_slp_vect_defs () to retrieve them.  */
2569
2570 void
2571 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2572                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2573 {
2574   gimple first_stmt;
2575   int number_of_vects = 0, i;
2576   unsigned int child_index = 0;
2577   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2578   slp_tree child = NULL;
2579   vec<tree> vec_defs;
2580   tree oprnd;
2581   bool vectorized_defs;
2582
2583   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2584   FOR_EACH_VEC_ELT (ops, i, oprnd)
2585     {
2586       /* For each operand we check if it has vectorized definitions in a child
2587          node or we need to create them (for invariants and constants).  We
2588          check if the LHS of the first stmt of the next child matches OPRND.
2589          If it does, we found the correct child.  Otherwise, we call
2590          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2591          to check this child node for the next operand.  */
2592       vectorized_defs = false;
2593       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2594         {
2595           child = SLP_TREE_CHILDREN (slp_node)[child_index];
2596
2597           /* We have to check both pattern and original def, if available.  */
2598           gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2599           gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2600
2601           if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2602               || (related
2603                   && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2604             {
2605               /* The number of vector defs is determined by the number of
2606                  vector statements in the node from which we get those
2607                  statements.  */
2608               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2609               vectorized_defs = true;
2610               child_index++;
2611             }
2612         }
2613
2614       if (!vectorized_defs)
2615         {
2616           if (i == 0)
2617             {
2618               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2619               /* Number of vector stmts was calculated according to LHS in
2620                  vect_schedule_slp_instance (), fix it by replacing LHS with
2621                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2622                  details.  */
2623               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2624                                              &rhs_size_unit);
2625               if (rhs_size_unit != lhs_size_unit)
2626                 {
2627                   number_of_vects *= rhs_size_unit;
2628                   number_of_vects /= lhs_size_unit;
2629                 }
2630             }
2631         }
2632
2633       /* Allocate memory for vectorized defs.  */
2634       vec_defs = vNULL;
2635       vec_defs.create (number_of_vects);
2636
2637       /* For reduction defs we call vect_get_constant_vectors (), since we are
2638          looking for initial loop invariant values.  */
2639       if (vectorized_defs && reduc_index == -1)
2640         /* The defs are already vectorized.  */
2641         vect_get_slp_vect_defs (child, &vec_defs);
2642       else
2643         /* Build vectors from scalar defs.  */
2644         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2645                                    number_of_vects, reduc_index);
2646
2647       vec_oprnds->quick_push (vec_defs);
2648
2649       /* For reductions, we only need initial values.  */
2650       if (reduc_index != -1)
2651         return;
2652     }
2653 }
2654
2655
2656 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2657    building a vector of type MASK_TYPE from it) and two input vectors placed in
2658    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2659    shifting by STRIDE elements of DR_CHAIN for every copy.
2660    (STRIDE is the number of vectorized stmts for NODE divided by the number of
2661    copies).
2662    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2663    the created stmts must be inserted.  */
2664
2665 static inline void
2666 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2667                            tree mask, int first_vec_indx, int second_vec_indx,
2668                            gimple_stmt_iterator *gsi, slp_tree node,
2669                            tree vectype, vec<tree> dr_chain,
2670                            int ncopies, int vect_stmts_counter)
2671 {
2672   tree perm_dest;
2673   gimple perm_stmt = NULL;
2674   stmt_vec_info next_stmt_info;
2675   int i, stride;
2676   tree first_vec, second_vec, data_ref;
2677
2678   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2679
2680   /* Initialize the vect stmts of NODE to properly insert the generated
2681      stmts later.  */
2682   for (i = SLP_TREE_VEC_STMTS (node).length ();
2683        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2684     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2685
2686   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2687   for (i = 0; i < ncopies; i++)
2688     {
2689       first_vec = dr_chain[first_vec_indx];
2690       second_vec = dr_chain[second_vec_indx];
2691
2692       /* Generate the permute statement.  */
2693       perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2694                                                 first_vec, second_vec, mask);
2695       data_ref = make_ssa_name (perm_dest, perm_stmt);
2696       gimple_set_lhs (perm_stmt, data_ref);
2697       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2698
2699       /* Store the vector statement in NODE.  */
2700       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2701
2702       first_vec_indx += stride;
2703       second_vec_indx += stride;
2704     }
2705
2706   /* Mark the scalar stmt as vectorized.  */
2707   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2708   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2709 }
2710
2711
2712 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2713    return in CURRENT_MASK_ELEMENT its equivalent in target specific
2714    representation.  Check that the mask is valid and return FALSE if not.
2715    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2716    the next vector, i.e., the current first vector is not needed.  */
2717
2718 static bool
2719 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2720                        int mask_nunits, bool only_one_vec, int index,
2721                        unsigned char *mask, int *current_mask_element,
2722                        bool *need_next_vector, int *number_of_mask_fixes,
2723                        bool *mask_fixed, bool *needs_first_vector)
2724 {
2725   int i;
2726
2727   /* Convert to target specific representation.  */
2728   *current_mask_element = first_mask_element + m;
2729   /* Adjust the value in case it's a mask for second and third vectors.  */
2730   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2731
2732   if (*current_mask_element < mask_nunits)
2733     *needs_first_vector = true;
2734
2735   /* We have only one input vector to permute but the mask accesses values in
2736      the next vector as well.  */
2737   if (only_one_vec && *current_mask_element >= mask_nunits)
2738     {
2739       if (dump_enabled_p ())
2740         {
2741           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2742                            "permutation requires at least two vectors ");
2743           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2744         }
2745
2746       return false;
2747     }
2748
2749   /* The mask requires the next vector.  */
2750   if (*current_mask_element >= mask_nunits * 2)
2751     {
2752       if (*needs_first_vector || *mask_fixed)
2753         {
2754           /* We either need the first vector too or have already moved to the
2755              next vector. In both cases, this permutation needs three
2756              vectors.  */
2757           if (dump_enabled_p ())
2758             {
2759               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2760                                "permutation requires at "
2761                                "least three vectors ");
2762               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2763             }
2764
2765           return false;
2766         }
2767
2768       /* We move to the next vector, dropping the first one and working with
2769          the second and the third - we need to adjust the values of the mask
2770          accordingly.  */
2771       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2772
2773       for (i = 0; i < index; i++)
2774         mask[i] -= mask_nunits * *number_of_mask_fixes;
2775
2776       (*number_of_mask_fixes)++;
2777       *mask_fixed = true;
2778     }
2779
2780   *need_next_vector = *mask_fixed;
2781
2782   /* This was the last element of this mask. Start a new one.  */
2783   if (index == mask_nunits - 1)
2784     {
2785       *number_of_mask_fixes = 1;
2786       *mask_fixed = false;
2787       *needs_first_vector = false;
2788     }
2789
2790   return true;
2791 }
2792
2793
2794 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2795    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2796    permute statements for the SLP node NODE of the SLP instance
2797    SLP_NODE_INSTANCE.  */
2798
2799 bool
2800 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2801                               gimple_stmt_iterator *gsi, int vf,
2802                               slp_instance slp_node_instance, bool analyze_only)
2803 {
2804   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2805   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2806   tree mask_element_type = NULL_TREE, mask_type;
2807   int i, j, k, nunits, vec_index = 0, scalar_index;
2808   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2809   gimple next_scalar_stmt;
2810   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2811   int first_mask_element;
2812   int index, unroll_factor, current_mask_element, ncopies;
2813   unsigned char *mask;
2814   bool only_one_vec = false, need_next_vector = false;
2815   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2816   int number_of_mask_fixes = 1;
2817   bool mask_fixed = false;
2818   bool needs_first_vector = false;
2819   enum machine_mode mode;
2820
2821   mode = TYPE_MODE (vectype);
2822
2823   if (!can_vec_perm_p (mode, false, NULL))
2824     {
2825       if (dump_enabled_p ())
2826         {
2827           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2828                            "no vect permute for ");
2829           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2830         }
2831       return false;
2832     }
2833
2834   /* The generic VEC_PERM_EXPR code always uses an integral type of the
2835      same size as the vector element being permuted.  */
2836   mask_element_type = lang_hooks.types.type_for_mode
2837                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2838   mask_type = get_vectype_for_scalar_type (mask_element_type);
2839   nunits = TYPE_VECTOR_SUBPARTS (vectype);
2840   mask = XALLOCAVEC (unsigned char, nunits);
2841   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2842
2843   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2844      unrolling factor.  */
2845   orig_vec_stmts_num = group_size *
2846                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2847   if (orig_vec_stmts_num == 1)
2848     only_one_vec = true;
2849
2850   /* Number of copies is determined by the final vectorization factor
2851      relatively to SLP_NODE_INSTANCE unrolling factor.  */
2852   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2853
2854   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2855     return false;
2856
2857   /* Generate permutation masks for every NODE. Number of masks for each NODE
2858      is equal to GROUP_SIZE.
2859      E.g., we have a group of three nodes with three loads from the same
2860      location in each node, and the vector size is 4. I.e., we have a
2861      a0b0c0a1b1c1... sequence and we need to create the following vectors:
2862      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2863      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2864      ...
2865
2866      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2867      The last mask is illegal since we assume two operands for permute
2868      operation, and the mask element values can't be outside that range.
2869      Hence, the last mask must be converted into {2,5,5,5}.
2870      For the first two permutations we need the first and the second input
2871      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2872      we need the second and the third vectors: {b1,c1,a2,b2} and
2873      {c2,a3,b3,c3}.  */
2874
2875     {
2876       scalar_index = 0;
2877       index = 0;
2878       vect_stmts_counter = 0;
2879       vec_index = 0;
2880       first_vec_index = vec_index++;
2881       if (only_one_vec)
2882         second_vec_index = first_vec_index;
2883       else
2884         second_vec_index =  vec_index++;
2885
2886       for (j = 0; j < unroll_factor; j++)
2887         {
2888           for (k = 0; k < group_size; k++)
2889             {
2890               i = SLP_TREE_LOAD_PERMUTATION (node)[k];
2891               first_mask_element = i + j * group_size;
2892               if (!vect_get_mask_element (stmt, first_mask_element, 0,
2893                                           nunits, only_one_vec, index,
2894                                           mask, &current_mask_element,
2895                                           &need_next_vector,
2896                                           &number_of_mask_fixes, &mask_fixed,
2897                                           &needs_first_vector))
2898                 return false;
2899               mask[index++] = current_mask_element;
2900
2901               if (index == nunits)
2902                 {
2903                   index = 0;
2904                   if (!can_vec_perm_p (mode, false, mask))
2905                     {
2906                       if (dump_enabled_p ())
2907                         {
2908                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2909                                            vect_location, 
2910                                            "unsupported vect permute { ");
2911                           for (i = 0; i < nunits; ++i)
2912                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2913                                          mask[i]);
2914                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2915                         }
2916                       return false;
2917                     }
2918
2919                   if (!analyze_only)
2920                     {
2921                       int l;
2922                       tree mask_vec, *mask_elts;
2923                       mask_elts = XALLOCAVEC (tree, nunits);
2924                       for (l = 0; l < nunits; ++l)
2925                         mask_elts[l] = build_int_cst (mask_element_type,
2926                                                       mask[l]);
2927                       mask_vec = build_vector (mask_type, mask_elts);
2928
2929                       if (need_next_vector)
2930                         {
2931                           first_vec_index = second_vec_index;
2932                           second_vec_index = vec_index;
2933                         }
2934
2935                       next_scalar_stmt
2936                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2937
2938                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
2939                                mask_vec, first_vec_index, second_vec_index,
2940                                gsi, node, vectype, dr_chain,
2941                                ncopies, vect_stmts_counter++);
2942                     }
2943                 }
2944             }
2945         }
2946     }
2947
2948   return true;
2949 }
2950
2951
2952
2953 /* Vectorize SLP instance tree in postorder.  */
2954
2955 static bool
2956 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2957                             unsigned int vectorization_factor)
2958 {
2959   gimple stmt;
2960   bool grouped_store, is_store;
2961   gimple_stmt_iterator si;
2962   stmt_vec_info stmt_info;
2963   unsigned int vec_stmts_size, nunits, group_size;
2964   tree vectype;
2965   int i;
2966   slp_tree child;
2967
2968   if (!node)
2969     return false;
2970
2971   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2972     vect_schedule_slp_instance (child, instance, vectorization_factor);
2973
2974   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2975   stmt_info = vinfo_for_stmt (stmt);
2976
2977   /* VECTYPE is the type of the destination.  */
2978   vectype = STMT_VINFO_VECTYPE (stmt_info);
2979   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2980   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2981
2982   /* For each SLP instance calculate number of vector stmts to be created
2983      for the scalar stmts in each node of the SLP tree.  Number of vector
2984      elements in one vector iteration is the number of scalar elements in
2985      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2986      size.  */
2987   vec_stmts_size = (vectorization_factor * group_size) / nunits;
2988
2989   if (!SLP_TREE_VEC_STMTS (node).exists ())
2990     {
2991       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
2992       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2993     }
2994
2995   if (dump_enabled_p ())
2996     {
2997       dump_printf_loc (MSG_NOTE,vect_location,
2998                        "------>vectorizing SLP node starting from: ");
2999       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3000     }
3001
3002   /* Loads should be inserted before the first load.  */
3003   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3004       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3005       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3006       && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3007     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3008   else if (is_pattern_stmt_p (stmt_info))
3009     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3010   else
3011     si = gsi_for_stmt (stmt);
3012
3013   /* Stores should be inserted just before the last store.  */
3014   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3015       && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3016     { 
3017       gimple last_store = vect_find_last_store_in_slp_instance (instance);
3018       if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3019        last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3020       si = gsi_for_stmt (last_store);
3021     }
3022
3023   /* Mark the first element of the reduction chain as reduction to properly
3024      transform the node.  In the analysis phase only the last element of the
3025      chain is marked as reduction.  */
3026   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3027       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3028     {
3029       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3030       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3031     }
3032
3033   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3034   return is_store;
3035 }
3036
3037 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3038    For loop vectorization this is done in vectorizable_call, but for SLP
3039    it needs to be deferred until end of vect_schedule_slp, because multiple
3040    SLP instances may refer to the same scalar stmt.  */
3041
3042 static void
3043 vect_remove_slp_scalar_calls (slp_tree node)
3044 {
3045   gimple stmt, new_stmt;
3046   gimple_stmt_iterator gsi;
3047   int i;
3048   slp_tree child;
3049   tree lhs;
3050   stmt_vec_info stmt_info;
3051
3052   if (!node)
3053     return;
3054
3055   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3056     vect_remove_slp_scalar_calls (child);
3057
3058   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3059     {
3060       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3061         continue;
3062       stmt_info = vinfo_for_stmt (stmt);
3063       if (stmt_info == NULL
3064           || is_pattern_stmt_p (stmt_info)
3065           || !PURE_SLP_STMT (stmt_info))
3066         continue;
3067       lhs = gimple_call_lhs (stmt);
3068       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3069       set_vinfo_for_stmt (new_stmt, stmt_info);
3070       set_vinfo_for_stmt (stmt, NULL);
3071       STMT_VINFO_STMT (stmt_info) = new_stmt;
3072       gsi = gsi_for_stmt (stmt);
3073       gsi_replace (&gsi, new_stmt, false);
3074       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3075     }
3076 }
3077
3078 /* Generate vector code for all SLP instances in the loop/basic block.  */
3079
3080 bool
3081 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3082 {
3083   vec<slp_instance> slp_instances;
3084   slp_instance instance;
3085   unsigned int i, vf;
3086   bool is_store = false;
3087
3088   if (loop_vinfo)
3089     {
3090       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3091       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3092     }
3093   else
3094     {
3095       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3096       vf = 1;
3097     }
3098
3099   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3100     {
3101       /* Schedule the tree of INSTANCE.  */
3102       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3103                                              instance, vf);
3104       if (dump_enabled_p ())
3105         dump_printf_loc (MSG_NOTE, vect_location,
3106                          "vectorizing stmts using SLP.");
3107     }
3108
3109   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3110     {
3111       slp_tree root = SLP_INSTANCE_TREE (instance);
3112       gimple store;
3113       unsigned int j;
3114       gimple_stmt_iterator gsi;
3115
3116       /* Remove scalar call stmts.  Do not do this for basic-block
3117          vectorization as not all uses may be vectorized.
3118          ???  Why should this be necessary?  DCE should be able to
3119          remove the stmts itself.
3120          ???  For BB vectorization we can as well remove scalar
3121          stmts starting from the SLP tree root if they have no
3122          uses.  */
3123       if (loop_vinfo)
3124         vect_remove_slp_scalar_calls (root);
3125
3126       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3127                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3128         {
3129           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3130             break;
3131
3132          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3133            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3134           /* Free the attached stmt_vec_info and remove the stmt.  */
3135           gsi = gsi_for_stmt (store);
3136           unlink_stmt_vdef (store);
3137           gsi_remove (&gsi, true);
3138           release_defs (store);
3139           free_stmt_vec_info (store);
3140         }
3141     }
3142
3143   return is_store;
3144 }
3145
3146
3147 /* Vectorize the basic block.  */
3148
3149 void
3150 vect_slp_transform_bb (basic_block bb)
3151 {
3152   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3153   gimple_stmt_iterator si;
3154
3155   gcc_assert (bb_vinfo);
3156
3157   if (dump_enabled_p ())
3158     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3159
3160   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3161     {
3162       gimple stmt = gsi_stmt (si);
3163       stmt_vec_info stmt_info;
3164
3165       if (dump_enabled_p ())
3166         {
3167           dump_printf_loc (MSG_NOTE, vect_location,
3168                            "------>SLPing statement: ");
3169           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3170         }
3171
3172       stmt_info = vinfo_for_stmt (stmt);
3173       gcc_assert (stmt_info);
3174
3175       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3176       if (STMT_SLP_TYPE (stmt_info))
3177         {
3178           vect_schedule_slp (NULL, bb_vinfo);
3179           break;
3180         }
3181     }
3182
3183   if (dump_enabled_p ())
3184     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3185                      "BASIC BLOCK VECTORIZED\n");
3186
3187   destroy_bb_vec_info (bb_vinfo);
3188 }