tree-vect-slp.c (vect_bb_slp_scalar_cost): New function computing scalar cost offsett...
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2013 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h"              /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40
41 /* Extract the location of the basic block in the source code.
42    Return the basic block location if succeed and NULL if not.  */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47   gimple stmt = NULL;
48   gimple_stmt_iterator si;
49
50   if (!bb)
51     return UNKNOWN_LOC;
52
53   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54     {
55       stmt = gsi_stmt (si);
56       if (gimple_location (stmt) != UNKNOWN_LOC)
57         return gimple_location (stmt);
58     }
59
60   return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69   int i;
70   slp_tree child;
71
72   if (!node)
73     return;
74
75   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76     vect_free_slp_tree (child);
77
78   SLP_TREE_CHILDREN (node).release ();
79   SLP_TREE_SCALAR_STMTS (node).release ();
80   SLP_TREE_VEC_STMTS (node).release ();
81   SLP_TREE_LOAD_PERMUTATION (node).release ();
82
83   free (node);
84 }
85
86
87 /* Free the memory allocated for the SLP instance.  */
88
89 void
90 vect_free_slp_instance (slp_instance instance)
91 {
92   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93   SLP_INSTANCE_LOADS (instance).release ();
94   SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95   free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS.  */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
103 {
104   slp_tree node;
105   gimple stmt = scalar_stmts[0];
106   unsigned int nops;
107
108   if (is_gimple_call (stmt))
109     nops = gimple_call_num_args (stmt);
110   else if (is_gimple_assign (stmt))
111     {
112       nops = gimple_num_ops (stmt) - 1;
113       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114         nops++;
115     }
116   else
117     return NULL;
118
119   node = XNEW (struct _slp_tree);
120   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121   SLP_TREE_VEC_STMTS (node).create (0);
122   SLP_TREE_CHILDREN (node).create (nops);
123   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
124
125   return node;
126 }
127
128
129 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
130    operand.  */
131 static vec<slp_oprnd_info> 
132 vect_create_oprnd_info (int nops, int group_size)
133 {
134   int i;
135   slp_oprnd_info oprnd_info;
136   vec<slp_oprnd_info> oprnds_info;
137
138   oprnds_info.create (nops);
139   for (i = 0; i < nops; i++)
140     {
141       oprnd_info = XNEW (struct _slp_oprnd_info);
142       oprnd_info->def_stmts.create (group_size);
143       oprnd_info->first_dt = vect_uninitialized_def;
144       oprnd_info->first_op_type = NULL_TREE;
145       oprnd_info->first_pattern = false;
146       oprnds_info.quick_push (oprnd_info);
147     }
148
149   return oprnds_info;
150 }
151
152
153 /* Free operands info.  */
154
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
157 {
158   int i;
159   slp_oprnd_info oprnd_info;
160
161   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
162     {
163       oprnd_info->def_stmts.release ();
164       XDELETE (oprnd_info);
165     }
166
167   oprnds_info.release ();
168 }
169
170
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
173
174 static int
175 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
176 {
177   gimple next_stmt = first_stmt;
178   int result = 0;
179
180   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181     return -1;
182
183   do
184     {
185       if (next_stmt == stmt)
186         return result;
187       result++;
188       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
189     }
190   while (next_stmt);
191
192   return -1;
193 }
194
195
196 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
197    they are of a valid type and that they match the defs of the first stmt of
198    the SLP group (stored in OPRNDS_INFO).  */
199
200 static bool
201 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
202                              gimple stmt, bool first,
203                              vec<slp_oprnd_info> *oprnds_info)
204 {
205   tree oprnd;
206   unsigned int i, number_of_oprnds;
207   tree def;
208   gimple def_stmt;
209   enum vect_def_type dt = vect_uninitialized_def;
210   struct loop *loop = NULL;
211   bool pattern = false;
212   slp_oprnd_info oprnd_info;
213   int op_idx = 1;
214   tree compare_rhs = NULL_TREE;
215
216   if (loop_vinfo)
217     loop = LOOP_VINFO_LOOP (loop_vinfo);
218
219   if (is_gimple_call (stmt))
220     {
221       number_of_oprnds = gimple_call_num_args (stmt);
222       op_idx = 3;
223     }
224   else if (is_gimple_assign (stmt))
225     {
226       number_of_oprnds = gimple_num_ops (stmt) - 1;
227       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
228         number_of_oprnds++;
229     }
230   else
231     return false;
232
233   for (i = 0; i < number_of_oprnds; i++)
234     {
235       if (compare_rhs)
236         {
237           oprnd = compare_rhs;
238           compare_rhs = NULL_TREE;
239         }
240       else
241         oprnd = gimple_op (stmt, op_idx++);
242
243       oprnd_info = (*oprnds_info)[i];
244
245       if (COMPARISON_CLASS_P (oprnd))
246         {
247           compare_rhs = TREE_OPERAND (oprnd, 1);
248           oprnd = TREE_OPERAND (oprnd, 0);
249         }
250
251       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
252                                &def, &dt)
253           || (!def_stmt && dt != vect_constant_def))
254         {
255           if (dump_enabled_p ())
256             {
257               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
258                                "Build SLP failed: can't find def for ");
259               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
260             }
261
262           return false;
263         }
264
265       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
266          from the pattern.  Check that all the stmts of the node are in the
267          pattern.  */
268       if (def_stmt && gimple_bb (def_stmt)
269           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
270               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
271                   && gimple_code (def_stmt) != GIMPLE_PHI))
272           && vinfo_for_stmt (def_stmt)
273           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
274           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
275           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
276         {
277           pattern = true;
278           if (!first && !oprnd_info->first_pattern)
279             {
280               if (dump_enabled_p ())
281                 {
282                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
283                                    "Build SLP failed: some of the stmts"
284                                    " are in a pattern, and others are not ");
285                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
286                 }
287
288               return false;
289             }
290
291           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
292           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
293
294           if (dt == vect_unknown_def_type)
295             {
296               if (dump_enabled_p ())
297                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
298                                  "Unsupported pattern.");
299               return false;
300             }
301
302           switch (gimple_code (def_stmt))
303             {
304               case GIMPLE_PHI:
305                 def = gimple_phi_result (def_stmt);
306                 break;
307
308               case GIMPLE_ASSIGN:
309                 def = gimple_assign_lhs (def_stmt);
310                 break;
311
312               default:
313                 if (dump_enabled_p ())
314                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
315                                    "unsupported defining stmt: ");
316                 return false;
317             }
318         }
319
320       if (first)
321         {
322           oprnd_info->first_dt = dt;
323           oprnd_info->first_pattern = pattern;
324           oprnd_info->first_op_type = TREE_TYPE (oprnd);
325         }
326       else
327         {
328           /* Not first stmt of the group, check that the def-stmt/s match
329              the def-stmt/s of the first stmt.  Allow different definition
330              types for reduction chains: the first stmt must be a
331              vect_reduction_def (a phi node), and the rest
332              vect_internal_def.  */
333           if (((oprnd_info->first_dt != dt
334                 && !(oprnd_info->first_dt == vect_reduction_def
335                      && dt == vect_internal_def)
336                 && !((oprnd_info->first_dt == vect_external_def
337                       || oprnd_info->first_dt == vect_constant_def)
338                      && (dt == vect_external_def
339                          || dt == vect_constant_def)))
340                || !types_compatible_p (oprnd_info->first_op_type,
341                                        TREE_TYPE (oprnd))))
342             {
343               if (dump_enabled_p ())
344                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
345                                  "Build SLP failed: different types ");
346
347               return false;
348             }
349         }
350
351       /* Check the types of the definitions.  */
352       switch (dt)
353         {
354         case vect_constant_def:
355         case vect_external_def:
356         case vect_reduction_def:
357           break;
358
359         case vect_internal_def:
360           oprnd_info->def_stmts.quick_push (def_stmt);
361           break;
362
363         default:
364           /* FORNOW: Not supported.  */
365           if (dump_enabled_p ())
366             {
367               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
368                                "Build SLP failed: illegal type of def ");
369               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
370             }
371
372           return false;
373         }
374     }
375
376   return true;
377 }
378
379
380 /* Verify if the scalar stmts STMTS are isomorphic, require data
381    permutation or are of unsupported types of operation.  Return
382    true if they are, otherwise return false and indicate in *MATCHES
383    which stmts are not isomorphic to the first one.  If MATCHES[0]
384    is false then this indicates the comparison could not be
385    carried out or the stmts will never be vectorized by SLP.  */
386
387 static bool
388 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
389                        vec<gimple> stmts, unsigned int group_size,
390                        unsigned nops, unsigned int *max_nunits,
391                        unsigned int vectorization_factor, bool *matches)
392 {
393   unsigned int i;
394   gimple stmt = stmts[0];
395   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
396   enum tree_code first_cond_code = ERROR_MARK;
397   tree lhs;
398   bool need_same_oprnds = false;
399   tree vectype, scalar_type, first_op1 = NULL_TREE;
400   optab optab;
401   int icode;
402   enum machine_mode optab_op2_mode;
403   enum machine_mode vec_mode;
404   struct data_reference *first_dr;
405   HOST_WIDE_INT dummy;
406   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
407   tree cond;
408
409   /* For every stmt in NODE find its def stmt/s.  */
410   FOR_EACH_VEC_ELT (stmts, i, stmt)
411     {
412       matches[i] = false;
413
414       if (dump_enabled_p ())
415         {
416           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
417           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
418         }
419
420       /* Fail to vectorize statements marked as unvectorizable.  */
421       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
422         {
423           if (dump_enabled_p ())
424             {
425               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
426                                "Build SLP failed: unvectorizable statement ");
427               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
428             }
429           /* Fatal mismatch.  */
430           matches[0] = false;
431           return false;
432         }
433
434       lhs = gimple_get_lhs (stmt);
435       if (lhs == NULL_TREE)
436         {
437           if (dump_enabled_p ())
438             {
439               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440                                "Build SLP failed: not GIMPLE_ASSIGN nor "
441                                "GIMPLE_CALL ");
442               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
443             }
444           /* Fatal mismatch.  */
445           matches[0] = false;
446           return false;
447         }
448
449        if (is_gimple_assign (stmt)
450            && gimple_assign_rhs_code (stmt) == COND_EXPR
451            && (cond = gimple_assign_rhs1 (stmt))
452            && !COMPARISON_CLASS_P (cond))
453         {
454           if (dump_enabled_p ())
455             {
456               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
457                                "Build SLP failed: condition is not "
458                                "comparison ");
459               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
460             }
461           /* Fatal mismatch.  */
462           matches[0] = false;
463           return false;
464         }
465
466       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
467       vectype = get_vectype_for_scalar_type (scalar_type);
468       if (!vectype)
469         {
470           if (dump_enabled_p ())
471             {
472               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
473                                "Build SLP failed: unsupported data-type ");
474               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
475                                  scalar_type);
476             }
477           /* Fatal mismatch.  */
478           matches[0] = false;
479           return false;
480         }
481
482       /* In case of multiple types we need to detect the smallest type.  */
483       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
484         {
485           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
486           if (bb_vinfo)
487             vectorization_factor = *max_nunits;
488         }
489
490       if (is_gimple_call (stmt))
491         {
492           rhs_code = CALL_EXPR;
493           if (gimple_call_internal_p (stmt)
494               || gimple_call_tail_p (stmt)
495               || gimple_call_noreturn_p (stmt)
496               || !gimple_call_nothrow_p (stmt)
497               || gimple_call_chain (stmt))
498             {
499               if (dump_enabled_p ())
500                 {
501                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
502                                    "Build SLP failed: unsupported call type ");
503                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
504                 }
505               /* Fatal mismatch.  */
506               matches[0] = false;
507               return false;
508             }
509         }
510       else
511         rhs_code = gimple_assign_rhs_code (stmt);
512
513       /* Check the operation.  */
514       if (i == 0)
515         {
516           first_stmt_code = rhs_code;
517
518           /* Shift arguments should be equal in all the packed stmts for a
519              vector shift with scalar shift operand.  */
520           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
521               || rhs_code == LROTATE_EXPR
522               || rhs_code == RROTATE_EXPR)
523             {
524               vec_mode = TYPE_MODE (vectype);
525
526               /* First see if we have a vector/vector shift.  */
527               optab = optab_for_tree_code (rhs_code, vectype,
528                                            optab_vector);
529
530               if (!optab
531                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
532                 {
533                   /* No vector/vector shift, try for a vector/scalar shift.  */
534                   optab = optab_for_tree_code (rhs_code, vectype,
535                                                optab_scalar);
536
537                   if (!optab)
538                     {
539                       if (dump_enabled_p ())
540                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
541                                          "Build SLP failed: no optab.");
542                       /* Fatal mismatch.  */
543                       matches[0] = false;
544                       return false;
545                     }
546                   icode = (int) optab_handler (optab, vec_mode);
547                   if (icode == CODE_FOR_nothing)
548                     {
549                       if (dump_enabled_p ())
550                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
551                                          "Build SLP failed: "
552                                          "op not supported by target.");
553                       /* Fatal mismatch.  */
554                       matches[0] = false;
555                       return false;
556                     }
557                   optab_op2_mode = insn_data[icode].operand[2].mode;
558                   if (!VECTOR_MODE_P (optab_op2_mode))
559                     {
560                       need_same_oprnds = true;
561                       first_op1 = gimple_assign_rhs2 (stmt);
562                     }
563                 }
564             }
565           else if (rhs_code == WIDEN_LSHIFT_EXPR)
566             {
567               need_same_oprnds = true;
568               first_op1 = gimple_assign_rhs2 (stmt);
569             }
570         }
571       else
572         {
573           if (first_stmt_code != rhs_code
574               && (first_stmt_code != IMAGPART_EXPR
575                   || rhs_code != REALPART_EXPR)
576               && (first_stmt_code != REALPART_EXPR
577                   || rhs_code != IMAGPART_EXPR)
578               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
579                    && (first_stmt_code == ARRAY_REF
580                        || first_stmt_code == BIT_FIELD_REF
581                        || first_stmt_code == INDIRECT_REF
582                        || first_stmt_code == COMPONENT_REF
583                        || first_stmt_code == MEM_REF)))
584             {
585               if (dump_enabled_p ())
586                 {
587                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
588                                    "Build SLP failed: different operation "
589                                    "in stmt ");
590                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
591                 }
592               /* Mismatch.  */
593               continue;
594             }
595
596           if (need_same_oprnds
597               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
598             {
599               if (dump_enabled_p ())
600                 {
601                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
602                                    "Build SLP failed: different shift "
603                                    "arguments in ");
604                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
605                 }
606               /* Mismatch.  */
607               continue;
608             }
609
610           if (rhs_code == CALL_EXPR)
611             {
612               gimple first_stmt = stmts[0];
613               if (gimple_call_num_args (stmt) != nops
614                   || !operand_equal_p (gimple_call_fn (first_stmt),
615                                        gimple_call_fn (stmt), 0)
616                   || gimple_call_fntype (first_stmt)
617                      != gimple_call_fntype (stmt))
618                 {
619                   if (dump_enabled_p ())
620                     {
621                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
622                                        "Build SLP failed: different calls in ");
623                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
624                                         stmt, 0);
625                     }
626                   /* Mismatch.  */
627                   continue;
628                 }
629             }
630         }
631
632       /* Grouped store or load.  */
633       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
634         {
635           if (REFERENCE_CLASS_P (lhs))
636             {
637               /* Store.  */
638               ;
639             }
640           else
641             {
642               /* Load.  */
643               unsigned unrolling_factor
644                 = least_common_multiple
645                     (*max_nunits, group_size) / group_size;
646               /* FORNOW: Check that there is no gap between the loads
647                  and no gap between the groups when we need to load
648                  multiple groups at once.
649                  ???  We should enhance this to only disallow gaps
650                  inside vectors.  */
651               if ((unrolling_factor > 1
652                    && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
653                    && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
654                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
655                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
656                 {
657                   if (dump_enabled_p ())
658                     {
659                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
660                                        "Build SLP failed: grouped "
661                                        "loads have gaps ");
662                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
663                                         stmt, 0);
664                     }
665                   /* Fatal mismatch.  */
666                   matches[0] = false;
667                   return false;
668                 }
669
670               /* Check that the size of interleaved loads group is not
671                  greater than the SLP group size.  */
672               unsigned ncopies
673                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
674               if (loop_vinfo
675                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
676                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
677                        - GROUP_GAP (vinfo_for_stmt (stmt)))
678                       > ncopies * group_size))
679                 {
680                   if (dump_enabled_p ())
681                     {
682                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683                                        "Build SLP failed: the number "
684                                        "of interleaved loads is greater than "
685                                        "the SLP group size ");
686                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
687                                         stmt, 0);
688                     }
689                   /* Fatal mismatch.  */
690                   matches[0] = false;
691                   return false;
692                 }
693
694               old_first_load = first_load;
695               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
696               if (prev_first_load)
697                 {
698                   /* Check that there are no loads from different interleaving
699                      chains in the same node.  */
700                   if (prev_first_load != first_load)
701                     {
702                       if (dump_enabled_p ())
703                         {
704                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
705                                            vect_location, 
706                                            "Build SLP failed: different "
707                                            "interleaving chains in one node ");
708                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
709                                             stmt, 0);
710                         }
711                       /* Mismatch.  */
712                       continue;
713                     }
714                 }
715               else
716                 prev_first_load = first_load;
717
718               /* In some cases a group of loads is just the same load
719                  repeated N times.  Only analyze its cost once.  */
720               if (first_load == stmt && old_first_load != first_load)
721                 {
722                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
723                   if (vect_supportable_dr_alignment (first_dr, false)
724                       == dr_unaligned_unsupported)
725                     {
726                       if (dump_enabled_p ())
727                         {
728                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
729                                            vect_location, 
730                                            "Build SLP failed: unsupported "
731                                            "unaligned load ");
732                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
733                                             stmt, 0);
734                         }
735                       /* Fatal mismatch.  */
736                       matches[0] = false;
737                       return false;
738                     }
739                 }
740            }
741         } /* Grouped access.  */
742       else
743         {
744           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
745             {
746               /* Not grouped load.  */
747               if (dump_enabled_p ())
748                 {
749                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
750                                    "Build SLP failed: not grouped load ");
751                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
752                 }
753
754               /* FORNOW: Not grouped loads are not supported.  */
755               /* Fatal mismatch.  */
756               matches[0] = false;
757               return false;
758             }
759
760           /* Not memory operation.  */
761           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
762               && TREE_CODE_CLASS (rhs_code) != tcc_unary
763               && rhs_code != COND_EXPR
764               && rhs_code != CALL_EXPR)
765             {
766               if (dump_enabled_p ())
767                 {
768                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769                                    "Build SLP failed: operation");
770                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
771                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
772                 }
773               /* Fatal mismatch.  */
774               matches[0] = false;
775               return false;
776             }
777
778           if (rhs_code == COND_EXPR)
779             {
780               tree cond_expr = gimple_assign_rhs1 (stmt);
781
782               if (i == 0)
783                 first_cond_code = TREE_CODE (cond_expr);
784               else if (first_cond_code != TREE_CODE (cond_expr))
785                 {
786                   if (dump_enabled_p ())
787                     {
788                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
789                                        "Build SLP failed: different"
790                                        " operation");
791                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
792                                         stmt, 0);
793                     }
794                   /* Mismatch.  */
795                   continue;
796                 }
797             }
798         }
799
800       matches[i] = true;
801     }
802
803   for (i = 0; i < group_size; ++i)
804     if (!matches[i])
805       return false;
806
807   return true;
808 }
809
810 /* Recursively build an SLP tree starting from NODE.
811    Fail (and return a value not equal to zero) if def-stmts are not
812    isomorphic, require data permutation or are of unsupported types of
813    operation.  Otherwise, return 0.
814    The value returned is the depth in the SLP tree where a mismatch
815    was found.  */
816
817 static bool
818 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
819                      slp_tree *node, unsigned int group_size,
820                      unsigned int *max_nunits,
821                      vec<slp_tree> *loads,
822                      unsigned int vectorization_factor,
823                      bool *matches, unsigned *npermutes)
824 {
825   unsigned nops, i, this_npermutes = 0;
826   gimple stmt;
827
828   if (!matches)
829     matches = XALLOCAVEC (bool, group_size);
830   if (!npermutes)
831     npermutes = &this_npermutes;
832
833   matches[0] = false;
834
835   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
836   if (is_gimple_call (stmt))
837     nops = gimple_call_num_args (stmt);
838   else if (is_gimple_assign (stmt))
839     {
840       nops = gimple_num_ops (stmt) - 1;
841       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
842         nops++;
843     }
844   else
845     return false;
846
847   if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
848                               SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
849                               max_nunits, vectorization_factor, matches))
850     return false;
851
852   /* If the SLP node is a load, terminate the recursion.  */
853   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
854       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
855     {
856       loads->safe_push (*node);
857       return true;
858     }
859
860   /* Get at the operands, verifying they are compatible.  */
861   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
862   slp_oprnd_info oprnd_info;
863   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
864     {
865       if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
866                                         stmt, (i == 0), &oprnds_info))
867         {
868           vect_free_oprnd_info (oprnds_info);
869           return false;
870         }
871     }
872
873   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
874
875   /* Create SLP_TREE nodes for the definition node/s.  */
876   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
877     {
878       slp_tree child;
879       unsigned old_nloads = loads->length ();
880       unsigned old_max_nunits = *max_nunits;
881
882       if (oprnd_info->first_dt != vect_internal_def)
883         continue;
884
885       child = vect_create_new_slp_node (oprnd_info->def_stmts);
886       if (!child)
887         {
888           vect_free_oprnd_info (oprnds_info);
889           return false;
890         }
891
892       bool *matches = XALLOCAVEC (bool, group_size);
893       if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
894                                group_size, max_nunits, loads,
895                                vectorization_factor, matches, npermutes))
896         {
897           oprnd_info->def_stmts = vNULL;
898           SLP_TREE_CHILDREN (*node).quick_push (child);
899           continue;
900         }
901
902       /* If the SLP build for operand zero failed and operand zero
903          and one can be commutated try that for the scalar stmts
904          that failed the match.  */
905       if (i == 0
906           /* A first scalar stmt mismatch signals a fatal mismatch.  */
907           && matches[0]
908           /* ???  For COND_EXPRs we can swap the comparison operands
909              as well as the arms under some constraints.  */
910           && nops == 2
911           && oprnds_info[1]->first_dt == vect_internal_def
912           && is_gimple_assign (stmt)
913           && commutative_tree_code (gimple_assign_rhs_code (stmt))
914           /* Do so only if the number of not successful permutes was nor more
915              than a cut-ff as re-trying the recursive match on
916              possibly each level of the tree would expose exponential
917              behavior.  */
918           && *npermutes < 4)
919         {
920           /* Roll back.  */
921           *max_nunits = old_max_nunits;
922           loads->truncate (old_nloads);
923           /* Swap mismatched definition stmts.  */
924           for (unsigned j = 0; j < group_size; ++j)
925             if (!matches[j])
926               {
927                 gimple tem = oprnds_info[0]->def_stmts[j];
928                 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
929                 oprnds_info[1]->def_stmts[j] = tem;
930               }
931           /* And try again ... */
932           if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
933                                    group_size, max_nunits, loads,
934                                    vectorization_factor,
935                                    matches, npermutes))
936             {
937               oprnd_info->def_stmts = vNULL;
938               SLP_TREE_CHILDREN (*node).quick_push (child);
939               continue;
940             }
941
942           ++*npermutes;
943         }
944
945       oprnd_info->def_stmts = vNULL;
946       vect_free_slp_tree (child);
947       vect_free_oprnd_info (oprnds_info);
948       return false;
949     }
950
951   vect_free_oprnd_info (oprnds_info);
952   return true;
953 }
954
955 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
956
957 static void
958 vect_print_slp_tree (int dump_kind, slp_tree node)
959 {
960   int i;
961   gimple stmt;
962   slp_tree child;
963
964   if (!node)
965     return;
966
967   dump_printf (dump_kind, "node ");
968   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
969     {
970       dump_printf (dump_kind, "\n\tstmt %d ", i);
971       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
972     }
973   dump_printf (dump_kind, "\n");
974
975   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
976     vect_print_slp_tree (dump_kind, child);
977 }
978
979
980 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
981    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
982    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
983    stmts in NODE are to be marked.  */
984
985 static void
986 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
987 {
988   int i;
989   gimple stmt;
990   slp_tree child;
991
992   if (!node)
993     return;
994
995   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
996     if (j < 0 || i == j)
997       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
998
999   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1000     vect_mark_slp_stmts (child, mark, j);
1001 }
1002
1003
1004 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1005
1006 static void
1007 vect_mark_slp_stmts_relevant (slp_tree node)
1008 {
1009   int i;
1010   gimple stmt;
1011   stmt_vec_info stmt_info;
1012   slp_tree child;
1013
1014   if (!node)
1015     return;
1016
1017   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1018     {
1019       stmt_info = vinfo_for_stmt (stmt);
1020       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1021                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1022       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1023     }
1024
1025   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1026     vect_mark_slp_stmts_relevant (child);
1027 }
1028
1029
1030 /* Rearrange the statements of NODE according to PERMUTATION.  */
1031
1032 static void
1033 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1034                           vec<unsigned> permutation)
1035 {
1036   gimple stmt;
1037   vec<gimple> tmp_stmts;
1038   unsigned int i;
1039   slp_tree child;
1040
1041   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1042     vect_slp_rearrange_stmts (child, group_size, permutation);
1043
1044   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1045   tmp_stmts.create (group_size);
1046   tmp_stmts.quick_grow_cleared (group_size);
1047
1048   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1049     tmp_stmts[permutation[i]] = stmt;
1050
1051   SLP_TREE_SCALAR_STMTS (node).release ();
1052   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1053 }
1054
1055
1056 /* Check if the required load permutations in the SLP instance
1057    SLP_INSTN are supported.  */
1058
1059 static bool
1060 vect_supported_load_permutation_p (slp_instance slp_instn)
1061 {
1062   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1063   unsigned int i, j, k, next;
1064   sbitmap load_index;
1065   slp_tree node;
1066   gimple stmt, load, next_load, first_load;
1067   struct data_reference *dr;
1068
1069   if (dump_enabled_p ())
1070     {
1071       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1072       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1073         if (node->load_permutation.exists ())
1074           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1075             dump_printf (MSG_NOTE, "%d ", next);
1076         else
1077           for (i = 0; i < group_size; ++i)
1078             dump_printf (MSG_NOTE, "%d ", i);
1079     }
1080
1081   /* In case of reduction every load permutation is allowed, since the order
1082      of the reduction statements is not important (as opposed to the case of
1083      grouped stores).  The only condition we need to check is that all the
1084      load nodes are of the same size and have the same permutation (and then
1085      rearrange all the nodes of the SLP instance according to this 
1086      permutation).  */
1087
1088   /* Check that all the load nodes are of the same size.  */
1089   /* ???  Can't we assert this? */
1090   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1091     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1092       return false;
1093
1094   node = SLP_INSTANCE_TREE (slp_instn);
1095   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1096
1097   /* Reduction (there are no data-refs in the root).
1098      In reduction chain the order of the loads is important.  */
1099   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1100       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1101     {
1102       slp_tree load;
1103       unsigned int lidx;
1104
1105       /* Compare all the permutation sequences to the first one.  We know
1106          that at least one load is permuted.  */
1107       node = SLP_INSTANCE_LOADS (slp_instn)[0];
1108       if (!node->load_permutation.exists ())
1109         return false;
1110       for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1111         {
1112           if (!load->load_permutation.exists ())
1113             return false;
1114           FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1115             if (lidx != node->load_permutation[j])
1116               return false;
1117         }
1118
1119       /* Check that the loads in the first sequence are different and there
1120          are no gaps between them.  */
1121       load_index = sbitmap_alloc (group_size);
1122       bitmap_clear (load_index);
1123       FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1124         {
1125           if (bitmap_bit_p (load_index, lidx))
1126             {
1127               sbitmap_free (load_index);
1128               return false;
1129             }
1130           bitmap_set_bit (load_index, lidx);
1131         }
1132       for (i = 0; i < group_size; i++)
1133         if (!bitmap_bit_p (load_index, i))
1134           {
1135             sbitmap_free (load_index);
1136             return false;
1137           }
1138       sbitmap_free (load_index);
1139
1140       /* This permutation is valid for reduction.  Since the order of the
1141          statements in the nodes is not important unless they are memory
1142          accesses, we can rearrange the statements in all the nodes
1143          according to the order of the loads.  */
1144       vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1145                                 node->load_permutation);
1146
1147       /* We are done, no actual permutations need to be generated.  */
1148       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1149         SLP_TREE_LOAD_PERMUTATION (node).release ();
1150       return true;
1151     }
1152
1153   /* In basic block vectorization we allow any subchain of an interleaving
1154      chain.
1155      FORNOW: not supported in loop SLP because of realignment compications.  */
1156   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1157     {
1158       /* Check that for every node in the instance the loads
1159          form a subchain.  */
1160       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1161         {
1162           next_load = NULL;
1163           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1164             {
1165               if (j != 0 && next_load != load)
1166                 return false;
1167               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1168             }
1169         }
1170
1171       /* Check that the alignment of the first load in every subchain, i.e.,
1172          the first statement in every load node, is supported.
1173          ???  This belongs in alignment checking.  */
1174       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1175         {
1176           first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1177           if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1178             {
1179               dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1180               if (vect_supportable_dr_alignment (dr, false)
1181                   == dr_unaligned_unsupported)
1182                 {
1183                   if (dump_enabled_p ())
1184                     {
1185                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1186                                        vect_location,
1187                                        "unsupported unaligned load ");
1188                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1189                                         first_load, 0);
1190                     }
1191                   return false;
1192                 }
1193             }
1194         }
1195
1196       /* We are done, no actual permutations need to be generated.  */
1197       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1198         SLP_TREE_LOAD_PERMUTATION (node).release ();
1199       return true;
1200     }
1201
1202   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1203      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1204      well (unless it's reduction).  */
1205   if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1206     return false;
1207   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1208     if (!node->load_permutation.exists ())
1209       return false;
1210
1211   load_index = sbitmap_alloc (group_size);
1212   bitmap_clear (load_index);
1213   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1214     {
1215       unsigned int lidx = node->load_permutation[0];
1216       if (bitmap_bit_p (load_index, lidx))
1217         {
1218           sbitmap_free (load_index);
1219           return false;
1220         }
1221       bitmap_set_bit (load_index, lidx);
1222       FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1223         if (k != lidx)
1224           {
1225             sbitmap_free (load_index);
1226             return false;
1227           }
1228     }
1229   for (i = 0; i < group_size; i++)
1230     if (!bitmap_bit_p (load_index, i))
1231       {
1232         sbitmap_free (load_index);
1233         return false;
1234       }
1235   sbitmap_free (load_index);
1236
1237   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1238     if (node->load_permutation.exists ()
1239         && !vect_transform_slp_perm_load
1240               (node, vNULL, NULL,
1241                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1242       return false;
1243   return true;
1244 }
1245
1246
1247 /* Find the first load in the loop that belongs to INSTANCE.
1248    When loads are in several SLP nodes, there can be a case in which the first
1249    load does not appear in the first SLP node to be transformed, causing
1250    incorrect order of statements.  Since we generate all the loads together,
1251    they must be inserted before the first load of the SLP instance and not
1252    before the first load of the first node of the instance.  */
1253
1254 static gimple
1255 vect_find_first_load_in_slp_instance (slp_instance instance)
1256 {
1257   int i, j;
1258   slp_tree load_node;
1259   gimple first_load = NULL, load;
1260
1261   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1262     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1263       first_load = get_earlier_stmt (load, first_load);
1264
1265   return first_load;
1266 }
1267
1268
1269 /* Find the last store in SLP INSTANCE.  */
1270
1271 static gimple
1272 vect_find_last_store_in_slp_instance (slp_instance instance)
1273 {
1274   int i;
1275   slp_tree node;
1276   gimple last_store = NULL, store;
1277
1278   node = SLP_INSTANCE_TREE (instance);
1279   for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1280     last_store = get_later_stmt (store, last_store);
1281
1282   return last_store;
1283 }
1284
1285 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1286
1287 static void
1288 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1289                          slp_instance instance, slp_tree node,
1290                          stmt_vector_for_cost *prologue_cost_vec,
1291                          unsigned ncopies_for_cost)
1292 {
1293   stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1294
1295   unsigned i;
1296   slp_tree child;
1297   gimple stmt, s;
1298   stmt_vec_info stmt_info;
1299   tree lhs;
1300   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1301
1302   /* Recurse down the SLP tree.  */
1303   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1304     vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1305                              instance, child, prologue_cost_vec,
1306                              ncopies_for_cost);
1307
1308   /* Look at the first scalar stmt to determine the cost.  */
1309   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1310   stmt_info = vinfo_for_stmt (stmt);
1311   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1312     {
1313       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1314         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1315                                vect_uninitialized_def,
1316                                node, prologue_cost_vec, body_cost_vec);
1317       else
1318         {
1319           int i;
1320           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1321           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1322                                 node, prologue_cost_vec, body_cost_vec);
1323           /* If the load is permuted record the cost for the permutation.
1324              ???  Loads from multiple chains are let through here only
1325              for a single special case involving complex numbers where
1326              in the end no permutation is necessary.  */
1327           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1328             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1329                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1330                 && vect_get_place_in_interleaving_chain
1331                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1332               {
1333                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1334                                   stmt_info, 0, vect_body);
1335                 break;
1336               }
1337         }
1338     }
1339   else
1340     record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1341                       stmt_info, 0, vect_body);
1342
1343   /* Scan operands and account for prologue cost of constants/externals.
1344      ???  This over-estimates cost for multiple uses and should be
1345      re-engineered.  */
1346   lhs = gimple_get_lhs (stmt);
1347   for (i = 0; i < gimple_num_ops (stmt); ++i)
1348     {
1349       tree def, op = gimple_op (stmt, i);
1350       gimple def_stmt;
1351       enum vect_def_type dt;
1352       if (!op || op == lhs)
1353         continue;
1354       if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1355                               &def_stmt, &def, &dt)
1356           && (dt == vect_constant_def || dt == vect_external_def))
1357         record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1358                           stmt_info, 0, vect_prologue);
1359     }
1360 }
1361
1362 /* Compute the cost for the SLP instance INSTANCE.  */
1363
1364 static void
1365 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1366                        slp_instance instance, unsigned nunits)
1367 {
1368   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1369   unsigned ncopies_for_cost;
1370   stmt_info_for_cost *si;
1371   unsigned i;
1372
1373   /* Calculate the number of vector stmts to create based on the unrolling
1374      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1375      GROUP_SIZE / NUNITS otherwise.  */
1376   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1377   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1378
1379   prologue_cost_vec.create (10);
1380   body_cost_vec.create (10);
1381   SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1382   vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1383                            instance, SLP_INSTANCE_TREE (instance),
1384                            &prologue_cost_vec, ncopies_for_cost);
1385
1386   /* Record the prologue costs, which were delayed until we were
1387      sure that SLP was successful.  Unlike the body costs, we know
1388      the final values now regardless of the loop vectorization factor.  */
1389   void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1390                 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1391   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1392     {
1393       struct _stmt_vec_info *stmt_info
1394         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1395       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1396                             si->misalign, vect_prologue);
1397     }
1398
1399   prologue_cost_vec.release ();
1400 }
1401
1402 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1403    vect_build_slp_tree to build a tree of packed stmts if possible.
1404    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1405
1406 static bool
1407 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1408                            gimple stmt)
1409 {
1410   slp_instance new_instance;
1411   slp_tree node;
1412   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1413   unsigned int unrolling_factor = 1, nunits;
1414   tree vectype, scalar_type = NULL_TREE;
1415   gimple next;
1416   unsigned int vectorization_factor = 0;
1417   int i;
1418   unsigned int max_nunits = 0;
1419   vec<slp_tree> loads;
1420   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1421   vec<gimple> scalar_stmts;
1422
1423   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1424     {
1425       if (dr)
1426         {
1427           scalar_type = TREE_TYPE (DR_REF (dr));
1428           vectype = get_vectype_for_scalar_type (scalar_type);
1429         }
1430       else
1431         {
1432           gcc_assert (loop_vinfo);
1433           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1434         }
1435
1436       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1437     }
1438   else
1439     {
1440       gcc_assert (loop_vinfo);
1441       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1442       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1443     }
1444
1445   if (!vectype)
1446     {
1447       if (dump_enabled_p ())
1448         {
1449           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1450                            "Build SLP failed: unsupported data-type ");
1451           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1452         }
1453
1454       return false;
1455     }
1456
1457   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1458   if (loop_vinfo)
1459     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1460   else
1461     vectorization_factor = nunits;
1462
1463   /* Calculate the unrolling factor.  */
1464   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1465   if (unrolling_factor != 1 && !loop_vinfo)
1466     {
1467       if (dump_enabled_p ())
1468         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1469                          "Build SLP failed: unrolling required in basic"
1470                          " block SLP");
1471
1472       return false;
1473     }
1474
1475   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1476   scalar_stmts.create (group_size);
1477   next = stmt;
1478   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1479     {
1480       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1481       while (next)
1482         {
1483           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1484               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1485             scalar_stmts.safe_push (
1486                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1487           else
1488             scalar_stmts.safe_push (next);
1489           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1490         }
1491     }
1492   else
1493     {
1494       /* Collect reduction statements.  */
1495       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1496       for (i = 0; reductions.iterate (i, &next); i++)
1497         scalar_stmts.safe_push (next);
1498     }
1499
1500   node = vect_create_new_slp_node (scalar_stmts);
1501
1502   loads.create (group_size);
1503
1504   /* Build the tree for the SLP instance.  */
1505   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1506                            &max_nunits, &loads,
1507                            vectorization_factor, NULL, NULL))
1508     {
1509       /* Calculate the unrolling factor based on the smallest type.  */
1510       if (max_nunits > nunits)
1511         unrolling_factor = least_common_multiple (max_nunits, group_size)
1512                            / group_size;
1513
1514       if (unrolling_factor != 1 && !loop_vinfo)
1515         {
1516           if (dump_enabled_p ())
1517             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1518                              "Build SLP failed: unrolling required in basic"
1519                              " block SLP");
1520           vect_free_slp_tree (node);
1521           loads.release ();
1522           return false;
1523         }
1524
1525       /* Create a new SLP instance.  */
1526       new_instance = XNEW (struct _slp_instance);
1527       SLP_INSTANCE_TREE (new_instance) = node;
1528       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1529       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1530       SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1531       SLP_INSTANCE_LOADS (new_instance) = loads;
1532       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1533
1534       /* Compute the load permutation.  */
1535       slp_tree load_node;
1536       bool loads_permuted = false;
1537       FOR_EACH_VEC_ELT (loads, i, load_node)
1538         {
1539           vec<unsigned> load_permutation;
1540           int j;
1541           gimple load, first_stmt;
1542           bool this_load_permuted = false;
1543           load_permutation.create (group_size);
1544           first_stmt = GROUP_FIRST_ELEMENT
1545               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1546           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1547             {
1548               int load_place
1549                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1550               gcc_assert (load_place != -1);
1551               if (load_place != j)
1552                 this_load_permuted = true;
1553               load_permutation.safe_push (load_place);
1554             }
1555           if (!this_load_permuted)
1556             {
1557               load_permutation.release ();
1558               continue;
1559             }
1560           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1561           loads_permuted = true;
1562         }
1563
1564       if (loads_permuted)
1565         {
1566           if (!vect_supported_load_permutation_p (new_instance))
1567             {
1568               if (dump_enabled_p ())
1569                 {
1570                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1571                                    "Build SLP failed: unsupported load "
1572                                    "permutation ");
1573                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1574                 }
1575               vect_free_slp_instance (new_instance);
1576               return false;
1577             }
1578
1579           SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1580             = vect_find_first_load_in_slp_instance (new_instance);
1581         }
1582
1583       /* Compute the costs of this SLP instance.  */
1584       vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1585                              new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1586
1587       if (loop_vinfo)
1588         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1589       else
1590         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1591
1592       if (dump_enabled_p ())
1593         vect_print_slp_tree (MSG_NOTE, node);
1594
1595       return true;
1596     }
1597
1598   /* Failed to SLP.  */
1599   /* Free the allocated memory.  */
1600   vect_free_slp_tree (node);
1601   loads.release ();
1602
1603   return false;
1604 }
1605
1606
1607 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1608    trees of packed scalar stmts if SLP is possible.  */
1609
1610 bool
1611 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1612 {
1613   unsigned int i;
1614   vec<gimple> grouped_stores;
1615   vec<gimple> reductions = vNULL;
1616   vec<gimple> reduc_chains = vNULL;
1617   gimple first_element;
1618   bool ok = false;
1619
1620   if (dump_enabled_p ())
1621     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1622
1623   if (loop_vinfo)
1624     {
1625       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1626       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1627       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1628     }
1629   else
1630     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1631
1632   /* Find SLP sequences starting from groups of grouped stores.  */
1633   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1634     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1635       ok = true;
1636
1637   if (bb_vinfo && !ok)
1638     {
1639       if (dump_enabled_p ())
1640         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1641                          "Failed to SLP the basic block.");
1642
1643       return false;
1644     }
1645
1646   if (loop_vinfo
1647       && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1648     {
1649       /* Find SLP sequences starting from reduction chains.  */
1650       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1651         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1652           ok = true;
1653         else
1654           return false;
1655
1656       /* Don't try to vectorize SLP reductions if reduction chain was
1657          detected.  */
1658       return ok;
1659     }
1660
1661   /* Find SLP sequences starting from groups of reductions.  */
1662   if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1663       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1664     ok = true;
1665
1666   return true;
1667 }
1668
1669
1670 /* For each possible SLP instance decide whether to SLP it and calculate overall
1671    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1672    least one instance.  */
1673
1674 bool
1675 vect_make_slp_decision (loop_vec_info loop_vinfo)
1676 {
1677   unsigned int i, unrolling_factor = 1;
1678   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1679   slp_instance instance;
1680   int decided_to_slp = 0;
1681
1682   if (dump_enabled_p ())
1683     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1684
1685   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1686     {
1687       /* FORNOW: SLP if you can.  */
1688       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1689         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1690
1691       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1692          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1693          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1694       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1695       decided_to_slp++;
1696     }
1697
1698   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1699
1700   if (decided_to_slp && dump_enabled_p ())
1701     dump_printf_loc (MSG_NOTE, vect_location,
1702                      "Decided to SLP %d instances. Unrolling factor %d",
1703                      decided_to_slp, unrolling_factor);
1704
1705   return (decided_to_slp > 0);
1706 }
1707
1708
1709 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1710    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1711
1712 static void
1713 vect_detect_hybrid_slp_stmts (slp_tree node)
1714 {
1715   int i;
1716   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1717   gimple stmt = stmts[0];
1718   imm_use_iterator imm_iter;
1719   gimple use_stmt;
1720   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1721   slp_tree child;
1722   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1723   struct loop *loop = NULL;
1724   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1725   basic_block bb = NULL;
1726
1727   if (!node)
1728     return;
1729
1730   if (loop_vinfo)
1731     loop = LOOP_VINFO_LOOP (loop_vinfo);
1732   else
1733     bb = BB_VINFO_BB (bb_vinfo);
1734
1735   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1736     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1737         && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1738       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1739         if (gimple_bb (use_stmt)
1740             && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1741                  || bb == gimple_bb (use_stmt))
1742             && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1743             && !STMT_SLP_TYPE (stmt_vinfo)
1744             && (STMT_VINFO_RELEVANT (stmt_vinfo)
1745                 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1746             && !(gimple_code (use_stmt) == GIMPLE_PHI
1747                  && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1748                   == vect_reduction_def))
1749           vect_mark_slp_stmts (node, hybrid, i);
1750
1751   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1752     vect_detect_hybrid_slp_stmts (child);
1753 }
1754
1755
1756 /* Find stmts that must be both vectorized and SLPed.  */
1757
1758 void
1759 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1760 {
1761   unsigned int i;
1762   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1763   slp_instance instance;
1764
1765   if (dump_enabled_p ())
1766     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1767
1768   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1769     vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1770 }
1771
1772
1773 /* Create and initialize a new bb_vec_info struct for BB, as well as
1774    stmt_vec_info structs for all the stmts in it.  */
1775
1776 static bb_vec_info
1777 new_bb_vec_info (basic_block bb)
1778 {
1779   bb_vec_info res = NULL;
1780   gimple_stmt_iterator gsi;
1781
1782   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1783   BB_VINFO_BB (res) = bb;
1784
1785   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1786     {
1787       gimple stmt = gsi_stmt (gsi);
1788       gimple_set_uid (stmt, 0);
1789       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1790     }
1791
1792   BB_VINFO_GROUPED_STORES (res).create (10);
1793   BB_VINFO_SLP_INSTANCES (res).create (2);
1794   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1795
1796   bb->aux = res;
1797   return res;
1798 }
1799
1800
1801 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1802    stmts in the basic block.  */
1803
1804 static void
1805 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1806 {
1807   vec<slp_instance> slp_instances;
1808   slp_instance instance;
1809   basic_block bb;
1810   gimple_stmt_iterator si;
1811   unsigned i;
1812
1813   if (!bb_vinfo)
1814     return;
1815
1816   bb = BB_VINFO_BB (bb_vinfo);
1817
1818   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1819     {
1820       gimple stmt = gsi_stmt (si);
1821       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1822
1823       if (stmt_info)
1824         /* Free stmt_vec_info.  */
1825         free_stmt_vec_info (stmt);
1826     }
1827
1828   free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1829   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1830   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1831   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1832   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1833     vect_free_slp_instance (instance);
1834   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1835   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1836   free (bb_vinfo);
1837   bb->aux = NULL;
1838 }
1839
1840
1841 /* Analyze statements contained in SLP tree node after recursively analyzing
1842    the subtree. Return TRUE if the operations are supported.  */
1843
1844 static bool
1845 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1846 {
1847   bool dummy;
1848   int i;
1849   gimple stmt;
1850   slp_tree child;
1851
1852   if (!node)
1853     return true;
1854
1855   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1856     if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1857       return false;
1858
1859   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1860     {
1861       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1862       gcc_assert (stmt_info);
1863       gcc_assert (PURE_SLP_STMT (stmt_info));
1864
1865       if (!vect_analyze_stmt (stmt, &dummy, node))
1866         return false;
1867     }
1868
1869   return true;
1870 }
1871
1872
1873 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
1874    operations are supported. */
1875
1876 static bool
1877 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1878 {
1879   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1880   slp_instance instance;
1881   int i;
1882
1883   for (i = 0; slp_instances.iterate (i, &instance); )
1884     {
1885       if (!vect_slp_analyze_node_operations (bb_vinfo,
1886                                              SLP_INSTANCE_TREE (instance)))
1887         {
1888           vect_free_slp_instance (instance);
1889           slp_instances.ordered_remove (i);
1890         }
1891       else
1892         i++;
1893     }
1894
1895   if (!slp_instances.length ())
1896     return false;
1897
1898   return true;
1899 }
1900
1901
1902 /* Compute the scalar cost of the SLP node NODE and its children
1903    and return it.  Do not account defs that are marked in LIFE and
1904    update LIFE according to uses of NODE.  */
1905
1906 static unsigned
1907 vect_bb_slp_scalar_cost (slp_tree node, vec<bool, va_stack> life)
1908 {
1909   unsigned scalar_cost = 0;
1910   unsigned i;
1911   gimple stmt;
1912   slp_tree child;
1913
1914   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1915     {
1916       unsigned stmt_cost;
1917       ssa_op_iter op_iter;
1918       def_operand_p def_p;
1919       stmt_vec_info stmt_info;
1920
1921       if (life[i])
1922         continue;
1923
1924       /* If there is a non-vectorized use of the defs then the scalar
1925          stmt is kept live in which case we do not account it or any
1926          required defs in the SLP children in the scalar cost.  This
1927          way we make the vectorization more costly when compared to
1928          the scalar cost.  */
1929       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
1930         {
1931           imm_use_iterator use_iter;
1932           gimple use_stmt;
1933           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
1934             if (!vinfo_for_stmt (use_stmt)
1935                 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))
1936               {
1937                 life[i] = true;
1938                 BREAK_FROM_IMM_USE_STMT (use_iter);
1939               }
1940         }
1941       if (life[i])
1942         continue;
1943
1944       stmt_info = vinfo_for_stmt (stmt);
1945       if (STMT_VINFO_DATA_REF (stmt_info))
1946         {
1947           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1948             stmt_cost = vect_get_stmt_cost (scalar_load);
1949           else
1950             stmt_cost = vect_get_stmt_cost (scalar_store);
1951         }
1952       else
1953         stmt_cost = vect_get_stmt_cost (scalar_stmt);
1954
1955       scalar_cost += stmt_cost;
1956     }
1957
1958   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1959     scalar_cost += vect_bb_slp_scalar_cost (child, life);
1960
1961   return scalar_cost;
1962 }
1963
1964 /* Check if vectorization of the basic block is profitable.  */
1965
1966 static bool
1967 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1968 {
1969   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1970   slp_instance instance;
1971   int i, j;
1972   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
1973   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
1974   void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
1975   stmt_vec_info stmt_info = NULL;
1976   stmt_vector_for_cost body_cost_vec;
1977   stmt_info_for_cost *ci;
1978
1979   /* Calculate vector costs.  */
1980   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1981     {
1982       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
1983
1984       FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
1985         {
1986           stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
1987           (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
1988                                 stmt_info, ci->misalign, vect_body);
1989         }
1990     }
1991
1992   /* Calculate scalar cost.  */
1993   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1994     {
1995       vec<bool, va_stack> life;
1996       vec_stack_alloc (bool, life, SLP_INSTANCE_GROUP_SIZE (instance));
1997       life.quick_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
1998       scalar_cost += vect_bb_slp_scalar_cost (SLP_INSTANCE_TREE (instance),
1999                                               life);
2000       life.release ();
2001     }
2002
2003   /* Complete the target-specific cost calculation.  */
2004   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2005                &vec_inside_cost, &vec_epilogue_cost);
2006
2007   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2008
2009   if (dump_enabled_p ())
2010     {
2011       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2012       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2013                    vec_inside_cost);
2014       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2015       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2016       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d", scalar_cost);
2017     }
2018
2019   /* Vectorization is profitable if its cost is less than the cost of scalar
2020      version.  */
2021   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2022     return false;
2023
2024   return true;
2025 }
2026
2027 /* Check if the basic block can be vectorized.  */
2028
2029 static bb_vec_info
2030 vect_slp_analyze_bb_1 (basic_block bb)
2031 {
2032   bb_vec_info bb_vinfo;
2033   vec<slp_instance> slp_instances;
2034   slp_instance instance;
2035   int i;
2036   int min_vf = 2;
2037
2038   bb_vinfo = new_bb_vec_info (bb);
2039   if (!bb_vinfo)
2040     return NULL;
2041
2042   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2043     {
2044       if (dump_enabled_p ())
2045         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2046                          "not vectorized: unhandled data-ref in basic "
2047                          "block.\n");
2048
2049       destroy_bb_vec_info (bb_vinfo);
2050       return NULL;
2051     }
2052
2053   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2054     {
2055       if (dump_enabled_p ())
2056         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2057                          "not vectorized: not enough data-refs in "
2058                          "basic block.\n");
2059
2060       destroy_bb_vec_info (bb_vinfo);
2061       return NULL;
2062     }
2063
2064   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2065     {
2066      if (dump_enabled_p ())
2067        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2068                         "not vectorized: unhandled data access in "
2069                         "basic block.\n");
2070
2071       destroy_bb_vec_info (bb_vinfo);
2072       return NULL;
2073     }
2074
2075   vect_pattern_recog (NULL, bb_vinfo);
2076
2077   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2078      {
2079        if (dump_enabled_p ())
2080          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2081                           "not vectorized: unhandled data dependence "
2082                           "in basic block.\n");
2083
2084        destroy_bb_vec_info (bb_vinfo);
2085        return NULL;
2086      }
2087
2088   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2089     {
2090       if (dump_enabled_p ())
2091         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2092                          "not vectorized: bad data alignment in basic "
2093                          "block.\n");
2094
2095       destroy_bb_vec_info (bb_vinfo);
2096       return NULL;
2097     }
2098
2099   /* Check the SLP opportunities in the basic block, analyze and build SLP
2100      trees.  */
2101   if (!vect_analyze_slp (NULL, bb_vinfo))
2102     {
2103       if (dump_enabled_p ())
2104         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2105                          "not vectorized: failed to find SLP opportunities "
2106                          "in basic block.\n");
2107
2108       destroy_bb_vec_info (bb_vinfo);
2109       return NULL;
2110     }
2111
2112   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2113
2114   /* Mark all the statements that we want to vectorize as pure SLP and
2115      relevant.  */
2116   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2117     {
2118       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2119       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2120     }
2121
2122   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2123     {
2124       if (dump_enabled_p ())
2125         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2126                          "not vectorized: unsupported alignment in basic "
2127                          "block.\n");
2128       destroy_bb_vec_info (bb_vinfo);
2129       return NULL;
2130     }
2131
2132   if (!vect_slp_analyze_operations (bb_vinfo))
2133     {
2134       if (dump_enabled_p ())
2135         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2136                          "not vectorized: bad operation in basic block.\n");
2137
2138       destroy_bb_vec_info (bb_vinfo);
2139       return NULL;
2140     }
2141
2142   /* Cost model: check if the vectorization is worthwhile.  */
2143   if (flag_vect_cost_model
2144       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2145     {
2146       if (dump_enabled_p ())
2147         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2148                          "not vectorized: vectorization is not "
2149                          "profitable.\n");
2150
2151       destroy_bb_vec_info (bb_vinfo);
2152       return NULL;
2153     }
2154
2155   if (dump_enabled_p ())
2156     dump_printf_loc (MSG_NOTE, vect_location,
2157                      "Basic block will be vectorized using SLP\n");
2158
2159   return bb_vinfo;
2160 }
2161
2162
2163 bb_vec_info
2164 vect_slp_analyze_bb (basic_block bb)
2165 {
2166   bb_vec_info bb_vinfo;
2167   int insns = 0;
2168   gimple_stmt_iterator gsi;
2169   unsigned int vector_sizes;
2170
2171   if (dump_enabled_p ())
2172     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2173
2174   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2175     {
2176       gimple stmt = gsi_stmt (gsi);
2177       if (!is_gimple_debug (stmt)
2178           && !gimple_nop_p (stmt)
2179           && gimple_code (stmt) != GIMPLE_LABEL)
2180         insns++;
2181     }
2182
2183   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2184     {
2185       if (dump_enabled_p ())
2186         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2187                          "not vectorized: too many instructions in "
2188                          "basic block.\n");
2189
2190       return NULL;
2191     }
2192
2193   /* Autodetect first vector size we try.  */
2194   current_vector_size = 0;
2195   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2196
2197   while (1)
2198     {
2199       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2200       if (bb_vinfo)
2201         return bb_vinfo;
2202
2203       destroy_bb_vec_info (bb_vinfo);
2204
2205       vector_sizes &= ~current_vector_size;
2206       if (vector_sizes == 0
2207           || current_vector_size == 0)
2208         return NULL;
2209
2210       /* Try the next biggest vector size.  */
2211       current_vector_size = 1 << floor_log2 (vector_sizes);
2212       if (dump_enabled_p ())
2213         dump_printf_loc (MSG_NOTE, vect_location,
2214                          "***** Re-trying analysis with "
2215                          "vector size %d\n", current_vector_size);
2216     }
2217 }
2218
2219
2220 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2221    the number of created vector stmts depends on the unrolling factor).
2222    However, the actual number of vector stmts for every SLP node depends on
2223    VF which is set later in vect_analyze_operations ().  Hence, SLP costs
2224    should be updated.  In this function we assume that the inside costs
2225    calculated in vect_model_xxx_cost are linear in ncopies.  */
2226
2227 void
2228 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2229 {
2230   unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2231   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2232   slp_instance instance;
2233   stmt_vector_for_cost body_cost_vec;
2234   stmt_info_for_cost *si;
2235   void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2236
2237   if (dump_enabled_p ())
2238     dump_printf_loc (MSG_NOTE, vect_location,
2239                      "=== vect_update_slp_costs_according_to_vf ===");
2240
2241   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2242     {
2243       /* We assume that costs are linear in ncopies.  */
2244       int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2245
2246       /* Record the instance's instructions in the target cost model.
2247          This was delayed until here because the count of instructions
2248          isn't known beforehand.  */
2249       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2250
2251       FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2252         (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2253                               vinfo_for_stmt (si->stmt), si->misalign,
2254                               vect_body);
2255     }
2256 }
2257
2258
2259 /* For constant and loop invariant defs of SLP_NODE this function returns
2260    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2261    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2262    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2263    REDUC_INDEX is the index of the reduction operand in the statements, unless
2264    it is -1.  */
2265
2266 static void
2267 vect_get_constant_vectors (tree op, slp_tree slp_node,
2268                            vec<tree> *vec_oprnds,
2269                            unsigned int op_num, unsigned int number_of_vectors,
2270                            int reduc_index)
2271 {
2272   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2273   gimple stmt = stmts[0];
2274   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2275   unsigned nunits;
2276   tree vec_cst;
2277   tree *elts;
2278   unsigned j, number_of_places_left_in_vector;
2279   tree vector_type;
2280   tree vop;
2281   int group_size = stmts.length ();
2282   unsigned int vec_num, i;
2283   unsigned number_of_copies = 1;
2284   vec<tree> voprnds;
2285   voprnds.create (number_of_vectors);
2286   bool constant_p, is_store;
2287   tree neutral_op = NULL;
2288   enum tree_code code = gimple_expr_code (stmt);
2289   gimple def_stmt;
2290   struct loop *loop;
2291   gimple_seq ctor_seq = NULL;
2292
2293   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2294       && reduc_index != -1)
2295     {
2296       op_num = reduc_index - 1;
2297       op = gimple_op (stmt, reduc_index);
2298       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2299          we need either neutral operands or the original operands.  See
2300          get_initial_def_for_reduction() for details.  */
2301       switch (code)
2302         {
2303           case WIDEN_SUM_EXPR:
2304           case DOT_PROD_EXPR:
2305           case PLUS_EXPR:
2306           case MINUS_EXPR:
2307           case BIT_IOR_EXPR:
2308           case BIT_XOR_EXPR:
2309              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2310                neutral_op = build_real (TREE_TYPE (op), dconst0);
2311              else
2312                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2313
2314              break;
2315
2316           case MULT_EXPR:
2317              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2318                neutral_op = build_real (TREE_TYPE (op), dconst1);
2319              else
2320                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2321
2322              break;
2323
2324           case BIT_AND_EXPR:
2325             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2326             break;
2327
2328           case MAX_EXPR:
2329           case MIN_EXPR:
2330             def_stmt = SSA_NAME_DEF_STMT (op);
2331             loop = (gimple_bb (stmt))->loop_father;
2332             neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2333                                                 loop_preheader_edge (loop));
2334             break;
2335
2336           default:
2337             neutral_op = NULL;
2338         }
2339     }
2340
2341   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2342     {
2343       is_store = true;
2344       op = gimple_assign_rhs1 (stmt);
2345     }
2346   else
2347     is_store = false;
2348
2349   gcc_assert (op);
2350
2351   if (CONSTANT_CLASS_P (op))
2352     constant_p = true;
2353   else
2354     constant_p = false;
2355
2356   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2357   gcc_assert (vector_type);
2358   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2359
2360   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2361      created vectors. It is greater than 1 if unrolling is performed.
2362
2363      For example, we have two scalar operands, s1 and s2 (e.g., group of
2364      strided accesses of size two), while NUNITS is four (i.e., four scalars
2365      of this type can be packed in a vector).  The output vector will contain
2366      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2367      will be 2).
2368
2369      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2370      containing the operands.
2371
2372      For example, NUNITS is four as before, and the group size is 8
2373      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2374      {s5, s6, s7, s8}.  */
2375
2376   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2377
2378   number_of_places_left_in_vector = nunits;
2379   elts = XALLOCAVEC (tree, nunits);
2380   for (j = 0; j < number_of_copies; j++)
2381     {
2382       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2383         {
2384           if (is_store)
2385             op = gimple_assign_rhs1 (stmt);
2386           else
2387             {
2388               switch (code)
2389                 {
2390                   case COND_EXPR:
2391                     if (op_num == 0 || op_num == 1)
2392                       {
2393                         tree cond = gimple_assign_rhs1 (stmt);
2394                         op = TREE_OPERAND (cond, op_num);
2395                       }
2396                     else
2397                       {
2398                         if (op_num == 2)
2399                           op = gimple_assign_rhs2 (stmt);
2400                         else
2401                           op = gimple_assign_rhs3 (stmt);
2402                       }
2403                     break;
2404
2405                   case CALL_EXPR:
2406                     op = gimple_call_arg (stmt, op_num);
2407                     break;
2408
2409                   case LSHIFT_EXPR:
2410                   case RSHIFT_EXPR:
2411                   case LROTATE_EXPR:
2412                   case RROTATE_EXPR:
2413                     op = gimple_op (stmt, op_num + 1);
2414                     /* Unlike the other binary operators, shifts/rotates have
2415                        the shift count being int, instead of the same type as
2416                        the lhs, so make sure the scalar is the right type if
2417                        we are dealing with vectors of
2418                        long long/long/short/char.  */
2419                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2420                       op = fold_convert (TREE_TYPE (vector_type), op);
2421                     break;
2422
2423                   default:
2424                     op = gimple_op (stmt, op_num + 1);
2425                     break;
2426                 }
2427             }
2428
2429           if (reduc_index != -1)
2430             {
2431               loop = (gimple_bb (stmt))->loop_father;
2432               def_stmt = SSA_NAME_DEF_STMT (op);
2433
2434               gcc_assert (loop);
2435
2436               /* Get the def before the loop.  In reduction chain we have only
2437                  one initial value.  */
2438               if ((j != (number_of_copies - 1)
2439                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2440                        && i != 0))
2441                   && neutral_op)
2442                 op = neutral_op;
2443               else
2444                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2445                                             loop_preheader_edge (loop));
2446             }
2447
2448           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2449           number_of_places_left_in_vector--;
2450           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2451             {
2452               if (CONSTANT_CLASS_P (op))
2453                 {
2454                   op = fold_unary (VIEW_CONVERT_EXPR,
2455                                    TREE_TYPE (vector_type), op);
2456                   gcc_assert (op && CONSTANT_CLASS_P (op));
2457                 }
2458               else
2459                 {
2460                   tree new_temp
2461                     = make_ssa_name (TREE_TYPE (vector_type), NULL);
2462                   gimple init_stmt;
2463                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2464                                op);               
2465                   init_stmt
2466                     = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2467                                                     new_temp, op, NULL_TREE);
2468                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2469                   op = new_temp;
2470                 }
2471             }
2472           elts[number_of_places_left_in_vector] = op;
2473           if (!CONSTANT_CLASS_P (op))
2474             constant_p = false;
2475
2476           if (number_of_places_left_in_vector == 0)
2477             {
2478               number_of_places_left_in_vector = nunits;
2479
2480               if (constant_p)
2481                 vec_cst = build_vector (vector_type, elts);
2482               else
2483                 {
2484                   vec<constructor_elt, va_gc> *v;
2485                   unsigned k;
2486                   vec_alloc (v, nunits);
2487                   for (k = 0; k < nunits; ++k)
2488                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2489                   vec_cst = build_constructor (vector_type, v);
2490                 }
2491               voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2492                                                     vector_type, NULL));
2493               if (ctor_seq != NULL)
2494                 {
2495                   gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2496                   gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2497                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2498                                                         GSI_SAME_STMT);
2499                   ctor_seq = NULL;
2500                 }
2501             }
2502         }
2503     }
2504
2505   /* Since the vectors are created in the reverse order, we should invert
2506      them.  */
2507   vec_num = voprnds.length ();
2508   for (j = vec_num; j != 0; j--)
2509     {
2510       vop = voprnds[j - 1];
2511       vec_oprnds->quick_push (vop);
2512     }
2513
2514   voprnds.release ();
2515
2516   /* In case that VF is greater than the unrolling factor needed for the SLP
2517      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2518      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2519      to replicate the vectors.  */
2520   while (number_of_vectors > vec_oprnds->length ())
2521     {
2522       tree neutral_vec = NULL;
2523
2524       if (neutral_op)
2525         {
2526           if (!neutral_vec)
2527             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2528
2529           vec_oprnds->quick_push (neutral_vec);
2530         }
2531       else
2532         {
2533           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2534             vec_oprnds->quick_push (vop);
2535         }
2536     }
2537 }
2538
2539
2540 /* Get vectorized definitions from SLP_NODE that contains corresponding
2541    vectorized def-stmts.  */
2542
2543 static void
2544 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2545 {
2546   tree vec_oprnd;
2547   gimple vec_def_stmt;
2548   unsigned int i;
2549
2550   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2551
2552   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2553     {
2554       gcc_assert (vec_def_stmt);
2555       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2556       vec_oprnds->quick_push (vec_oprnd);
2557     }
2558 }
2559
2560
2561 /* Get vectorized definitions for SLP_NODE.
2562    If the scalar definitions are loop invariants or constants, collect them and
2563    call vect_get_constant_vectors() to create vector stmts.
2564    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2565    must be stored in the corresponding child of SLP_NODE, and we call
2566    vect_get_slp_vect_defs () to retrieve them.  */
2567
2568 void
2569 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2570                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2571 {
2572   gimple first_stmt;
2573   int number_of_vects = 0, i;
2574   unsigned int child_index = 0;
2575   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2576   slp_tree child = NULL;
2577   vec<tree> vec_defs;
2578   tree oprnd;
2579   bool vectorized_defs;
2580
2581   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2582   FOR_EACH_VEC_ELT (ops, i, oprnd)
2583     {
2584       /* For each operand we check if it has vectorized definitions in a child
2585          node or we need to create them (for invariants and constants).  We
2586          check if the LHS of the first stmt of the next child matches OPRND.
2587          If it does, we found the correct child.  Otherwise, we call
2588          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2589          to check this child node for the next operand.  */
2590       vectorized_defs = false;
2591       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2592         {
2593           child = SLP_TREE_CHILDREN (slp_node)[child_index];
2594
2595           /* We have to check both pattern and original def, if available.  */
2596           gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2597           gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2598
2599           if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2600               || (related
2601                   && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2602             {
2603               /* The number of vector defs is determined by the number of
2604                  vector statements in the node from which we get those
2605                  statements.  */
2606               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2607               vectorized_defs = true;
2608               child_index++;
2609             }
2610         }
2611
2612       if (!vectorized_defs)
2613         {
2614           if (i == 0)
2615             {
2616               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2617               /* Number of vector stmts was calculated according to LHS in
2618                  vect_schedule_slp_instance (), fix it by replacing LHS with
2619                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2620                  details.  */
2621               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2622                                              &rhs_size_unit);
2623               if (rhs_size_unit != lhs_size_unit)
2624                 {
2625                   number_of_vects *= rhs_size_unit;
2626                   number_of_vects /= lhs_size_unit;
2627                 }
2628             }
2629         }
2630
2631       /* Allocate memory for vectorized defs.  */
2632       vec_defs = vNULL;
2633       vec_defs.create (number_of_vects);
2634
2635       /* For reduction defs we call vect_get_constant_vectors (), since we are
2636          looking for initial loop invariant values.  */
2637       if (vectorized_defs && reduc_index == -1)
2638         /* The defs are already vectorized.  */
2639         vect_get_slp_vect_defs (child, &vec_defs);
2640       else
2641         /* Build vectors from scalar defs.  */
2642         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2643                                    number_of_vects, reduc_index);
2644
2645       vec_oprnds->quick_push (vec_defs);
2646
2647       /* For reductions, we only need initial values.  */
2648       if (reduc_index != -1)
2649         return;
2650     }
2651 }
2652
2653
2654 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2655    building a vector of type MASK_TYPE from it) and two input vectors placed in
2656    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2657    shifting by STRIDE elements of DR_CHAIN for every copy.
2658    (STRIDE is the number of vectorized stmts for NODE divided by the number of
2659    copies).
2660    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2661    the created stmts must be inserted.  */
2662
2663 static inline void
2664 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2665                            tree mask, int first_vec_indx, int second_vec_indx,
2666                            gimple_stmt_iterator *gsi, slp_tree node,
2667                            tree vectype, vec<tree> dr_chain,
2668                            int ncopies, int vect_stmts_counter)
2669 {
2670   tree perm_dest;
2671   gimple perm_stmt = NULL;
2672   stmt_vec_info next_stmt_info;
2673   int i, stride;
2674   tree first_vec, second_vec, data_ref;
2675
2676   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2677
2678   /* Initialize the vect stmts of NODE to properly insert the generated
2679      stmts later.  */
2680   for (i = SLP_TREE_VEC_STMTS (node).length ();
2681        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2682     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2683
2684   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2685   for (i = 0; i < ncopies; i++)
2686     {
2687       first_vec = dr_chain[first_vec_indx];
2688       second_vec = dr_chain[second_vec_indx];
2689
2690       /* Generate the permute statement.  */
2691       perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2692                                                 first_vec, second_vec, mask);
2693       data_ref = make_ssa_name (perm_dest, perm_stmt);
2694       gimple_set_lhs (perm_stmt, data_ref);
2695       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2696
2697       /* Store the vector statement in NODE.  */
2698       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2699
2700       first_vec_indx += stride;
2701       second_vec_indx += stride;
2702     }
2703
2704   /* Mark the scalar stmt as vectorized.  */
2705   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2706   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2707 }
2708
2709
2710 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2711    return in CURRENT_MASK_ELEMENT its equivalent in target specific
2712    representation.  Check that the mask is valid and return FALSE if not.
2713    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2714    the next vector, i.e., the current first vector is not needed.  */
2715
2716 static bool
2717 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2718                        int mask_nunits, bool only_one_vec, int index,
2719                        unsigned char *mask, int *current_mask_element,
2720                        bool *need_next_vector, int *number_of_mask_fixes,
2721                        bool *mask_fixed, bool *needs_first_vector)
2722 {
2723   int i;
2724
2725   /* Convert to target specific representation.  */
2726   *current_mask_element = first_mask_element + m;
2727   /* Adjust the value in case it's a mask for second and third vectors.  */
2728   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2729
2730   if (*current_mask_element < mask_nunits)
2731     *needs_first_vector = true;
2732
2733   /* We have only one input vector to permute but the mask accesses values in
2734      the next vector as well.  */
2735   if (only_one_vec && *current_mask_element >= mask_nunits)
2736     {
2737       if (dump_enabled_p ())
2738         {
2739           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2740                            "permutation requires at least two vectors ");
2741           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2742         }
2743
2744       return false;
2745     }
2746
2747   /* The mask requires the next vector.  */
2748   if (*current_mask_element >= mask_nunits * 2)
2749     {
2750       if (*needs_first_vector || *mask_fixed)
2751         {
2752           /* We either need the first vector too or have already moved to the
2753              next vector. In both cases, this permutation needs three
2754              vectors.  */
2755           if (dump_enabled_p ())
2756             {
2757               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2758                                "permutation requires at "
2759                                "least three vectors ");
2760               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2761             }
2762
2763           return false;
2764         }
2765
2766       /* We move to the next vector, dropping the first one and working with
2767          the second and the third - we need to adjust the values of the mask
2768          accordingly.  */
2769       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2770
2771       for (i = 0; i < index; i++)
2772         mask[i] -= mask_nunits * *number_of_mask_fixes;
2773
2774       (*number_of_mask_fixes)++;
2775       *mask_fixed = true;
2776     }
2777
2778   *need_next_vector = *mask_fixed;
2779
2780   /* This was the last element of this mask. Start a new one.  */
2781   if (index == mask_nunits - 1)
2782     {
2783       *number_of_mask_fixes = 1;
2784       *mask_fixed = false;
2785       *needs_first_vector = false;
2786     }
2787
2788   return true;
2789 }
2790
2791
2792 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2793    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2794    permute statements for the SLP node NODE of the SLP instance
2795    SLP_NODE_INSTANCE.  */
2796
2797 bool
2798 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2799                               gimple_stmt_iterator *gsi, int vf,
2800                               slp_instance slp_node_instance, bool analyze_only)
2801 {
2802   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2803   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2804   tree mask_element_type = NULL_TREE, mask_type;
2805   int i, j, k, nunits, vec_index = 0, scalar_index;
2806   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2807   gimple next_scalar_stmt;
2808   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2809   int first_mask_element;
2810   int index, unroll_factor, current_mask_element, ncopies;
2811   unsigned char *mask;
2812   bool only_one_vec = false, need_next_vector = false;
2813   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2814   int number_of_mask_fixes = 1;
2815   bool mask_fixed = false;
2816   bool needs_first_vector = false;
2817   enum machine_mode mode;
2818
2819   mode = TYPE_MODE (vectype);
2820
2821   if (!can_vec_perm_p (mode, false, NULL))
2822     {
2823       if (dump_enabled_p ())
2824         {
2825           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2826                            "no vect permute for ");
2827           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2828         }
2829       return false;
2830     }
2831
2832   /* The generic VEC_PERM_EXPR code always uses an integral type of the
2833      same size as the vector element being permuted.  */
2834   mask_element_type = lang_hooks.types.type_for_mode
2835                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2836   mask_type = get_vectype_for_scalar_type (mask_element_type);
2837   nunits = TYPE_VECTOR_SUBPARTS (vectype);
2838   mask = XALLOCAVEC (unsigned char, nunits);
2839   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2840
2841   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2842      unrolling factor.  */
2843   orig_vec_stmts_num = group_size *
2844                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2845   if (orig_vec_stmts_num == 1)
2846     only_one_vec = true;
2847
2848   /* Number of copies is determined by the final vectorization factor
2849      relatively to SLP_NODE_INSTANCE unrolling factor.  */
2850   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2851
2852   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2853     return false;
2854
2855   /* Generate permutation masks for every NODE. Number of masks for each NODE
2856      is equal to GROUP_SIZE.
2857      E.g., we have a group of three nodes with three loads from the same
2858      location in each node, and the vector size is 4. I.e., we have a
2859      a0b0c0a1b1c1... sequence and we need to create the following vectors:
2860      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2861      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2862      ...
2863
2864      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2865      The last mask is illegal since we assume two operands for permute
2866      operation, and the mask element values can't be outside that range.
2867      Hence, the last mask must be converted into {2,5,5,5}.
2868      For the first two permutations we need the first and the second input
2869      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2870      we need the second and the third vectors: {b1,c1,a2,b2} and
2871      {c2,a3,b3,c3}.  */
2872
2873     {
2874       scalar_index = 0;
2875       index = 0;
2876       vect_stmts_counter = 0;
2877       vec_index = 0;
2878       first_vec_index = vec_index++;
2879       if (only_one_vec)
2880         second_vec_index = first_vec_index;
2881       else
2882         second_vec_index =  vec_index++;
2883
2884       for (j = 0; j < unroll_factor; j++)
2885         {
2886           for (k = 0; k < group_size; k++)
2887             {
2888               i = SLP_TREE_LOAD_PERMUTATION (node)[k];
2889               first_mask_element = i + j * group_size;
2890               if (!vect_get_mask_element (stmt, first_mask_element, 0,
2891                                           nunits, only_one_vec, index,
2892                                           mask, &current_mask_element,
2893                                           &need_next_vector,
2894                                           &number_of_mask_fixes, &mask_fixed,
2895                                           &needs_first_vector))
2896                 return false;
2897               mask[index++] = current_mask_element;
2898
2899               if (index == nunits)
2900                 {
2901                   index = 0;
2902                   if (!can_vec_perm_p (mode, false, mask))
2903                     {
2904                       if (dump_enabled_p ())
2905                         {
2906                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2907                                            vect_location, 
2908                                            "unsupported vect permute { ");
2909                           for (i = 0; i < nunits; ++i)
2910                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2911                                          mask[i]);
2912                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2913                         }
2914                       return false;
2915                     }
2916
2917                   if (!analyze_only)
2918                     {
2919                       int l;
2920                       tree mask_vec, *mask_elts;
2921                       mask_elts = XALLOCAVEC (tree, nunits);
2922                       for (l = 0; l < nunits; ++l)
2923                         mask_elts[l] = build_int_cst (mask_element_type,
2924                                                       mask[l]);
2925                       mask_vec = build_vector (mask_type, mask_elts);
2926
2927                       if (need_next_vector)
2928                         {
2929                           first_vec_index = second_vec_index;
2930                           second_vec_index = vec_index;
2931                         }
2932
2933                       next_scalar_stmt
2934                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2935
2936                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
2937                                mask_vec, first_vec_index, second_vec_index,
2938                                gsi, node, vectype, dr_chain,
2939                                ncopies, vect_stmts_counter++);
2940                     }
2941                 }
2942             }
2943         }
2944     }
2945
2946   return true;
2947 }
2948
2949
2950
2951 /* Vectorize SLP instance tree in postorder.  */
2952
2953 static bool
2954 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2955                             unsigned int vectorization_factor)
2956 {
2957   gimple stmt;
2958   bool grouped_store, is_store;
2959   gimple_stmt_iterator si;
2960   stmt_vec_info stmt_info;
2961   unsigned int vec_stmts_size, nunits, group_size;
2962   tree vectype;
2963   int i;
2964   slp_tree child;
2965
2966   if (!node)
2967     return false;
2968
2969   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2970     vect_schedule_slp_instance (child, instance, vectorization_factor);
2971
2972   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2973   stmt_info = vinfo_for_stmt (stmt);
2974
2975   /* VECTYPE is the type of the destination.  */
2976   vectype = STMT_VINFO_VECTYPE (stmt_info);
2977   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2978   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2979
2980   /* For each SLP instance calculate number of vector stmts to be created
2981      for the scalar stmts in each node of the SLP tree.  Number of vector
2982      elements in one vector iteration is the number of scalar elements in
2983      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2984      size.  */
2985   vec_stmts_size = (vectorization_factor * group_size) / nunits;
2986
2987   if (!SLP_TREE_VEC_STMTS (node).exists ())
2988     {
2989       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
2990       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2991     }
2992
2993   if (dump_enabled_p ())
2994     {
2995       dump_printf_loc (MSG_NOTE,vect_location,
2996                        "------>vectorizing SLP node starting from: ");
2997       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2998     }
2999
3000   /* Loads should be inserted before the first load.  */
3001   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3002       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3003       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3004       && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3005     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3006   else if (is_pattern_stmt_p (stmt_info))
3007     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3008   else
3009     si = gsi_for_stmt (stmt);
3010
3011   /* Stores should be inserted just before the last store.  */
3012   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3013       && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3014     { 
3015       gimple last_store = vect_find_last_store_in_slp_instance (instance);
3016       if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3017        last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3018       si = gsi_for_stmt (last_store);
3019     }
3020
3021   /* Mark the first element of the reduction chain as reduction to properly
3022      transform the node.  In the analysis phase only the last element of the
3023      chain is marked as reduction.  */
3024   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3025       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3026     {
3027       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3028       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3029     }
3030
3031   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3032   return is_store;
3033 }
3034
3035 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3036    For loop vectorization this is done in vectorizable_call, but for SLP
3037    it needs to be deferred until end of vect_schedule_slp, because multiple
3038    SLP instances may refer to the same scalar stmt.  */
3039
3040 static void
3041 vect_remove_slp_scalar_calls (slp_tree node)
3042 {
3043   gimple stmt, new_stmt;
3044   gimple_stmt_iterator gsi;
3045   int i;
3046   slp_tree child;
3047   tree lhs;
3048   stmt_vec_info stmt_info;
3049
3050   if (!node)
3051     return;
3052
3053   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3054     vect_remove_slp_scalar_calls (child);
3055
3056   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3057     {
3058       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3059         continue;
3060       stmt_info = vinfo_for_stmt (stmt);
3061       if (stmt_info == NULL
3062           || is_pattern_stmt_p (stmt_info)
3063           || !PURE_SLP_STMT (stmt_info))
3064         continue;
3065       lhs = gimple_call_lhs (stmt);
3066       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3067       set_vinfo_for_stmt (new_stmt, stmt_info);
3068       set_vinfo_for_stmt (stmt, NULL);
3069       STMT_VINFO_STMT (stmt_info) = new_stmt;
3070       gsi = gsi_for_stmt (stmt);
3071       gsi_replace (&gsi, new_stmt, false);
3072       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3073     }
3074 }
3075
3076 /* Generate vector code for all SLP instances in the loop/basic block.  */
3077
3078 bool
3079 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3080 {
3081   vec<slp_instance> slp_instances;
3082   slp_instance instance;
3083   unsigned int i, vf;
3084   bool is_store = false;
3085
3086   if (loop_vinfo)
3087     {
3088       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3089       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3090     }
3091   else
3092     {
3093       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3094       vf = 1;
3095     }
3096
3097   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3098     {
3099       /* Schedule the tree of INSTANCE.  */
3100       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3101                                              instance, vf);
3102       if (dump_enabled_p ())
3103         dump_printf_loc (MSG_NOTE, vect_location,
3104                          "vectorizing stmts using SLP.");
3105     }
3106
3107   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3108     {
3109       slp_tree root = SLP_INSTANCE_TREE (instance);
3110       gimple store;
3111       unsigned int j;
3112       gimple_stmt_iterator gsi;
3113
3114       /* Remove scalar call stmts.  Do not do this for basic-block
3115          vectorization as not all uses may be vectorized.
3116          ???  Why should this be necessary?  DCE should be able to
3117          remove the stmts itself.
3118          ???  For BB vectorization we can as well remove scalar
3119          stmts starting from the SLP tree root if they have no
3120          uses.  */
3121       if (loop_vinfo)
3122         vect_remove_slp_scalar_calls (root);
3123
3124       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3125                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3126         {
3127           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3128             break;
3129
3130          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3131            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3132           /* Free the attached stmt_vec_info and remove the stmt.  */
3133           gsi = gsi_for_stmt (store);
3134           unlink_stmt_vdef (store);
3135           gsi_remove (&gsi, true);
3136           release_defs (store);
3137           free_stmt_vec_info (store);
3138         }
3139     }
3140
3141   return is_store;
3142 }
3143
3144
3145 /* Vectorize the basic block.  */
3146
3147 void
3148 vect_slp_transform_bb (basic_block bb)
3149 {
3150   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3151   gimple_stmt_iterator si;
3152
3153   gcc_assert (bb_vinfo);
3154
3155   if (dump_enabled_p ())
3156     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3157
3158   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3159     {
3160       gimple stmt = gsi_stmt (si);
3161       stmt_vec_info stmt_info;
3162
3163       if (dump_enabled_p ())
3164         {
3165           dump_printf_loc (MSG_NOTE, vect_location,
3166                            "------>SLPing statement: ");
3167           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3168         }
3169
3170       stmt_info = vinfo_for_stmt (stmt);
3171       gcc_assert (stmt_info);
3172
3173       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3174       if (STMT_SLP_TYPE (stmt_info))
3175         {
3176           vect_schedule_slp (NULL, bb_vinfo);
3177           break;
3178         }
3179     }
3180
3181   if (dump_enabled_p ())
3182     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3183                      "BASIC BLOCK VECTORIZED\n");
3184
3185   destroy_bb_vec_info (bb_vinfo);
3186 }