Delay base decl's alignment adjustment until transformation happens
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2013 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h"              /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40
41 /* Extract the location of the basic block in the source code.
42    Return the basic block location if succeed and NULL if not.  */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47   gimple stmt = NULL;
48   gimple_stmt_iterator si;
49
50   if (!bb)
51     return UNKNOWN_LOC;
52
53   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54     {
55       stmt = gsi_stmt (si);
56       if (gimple_location (stmt) != UNKNOWN_LOC)
57         return gimple_location (stmt);
58     }
59
60   return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69   int i;
70   slp_tree child;
71
72   if (!node)
73     return;
74
75   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76     vect_free_slp_tree (child);
77
78   SLP_TREE_CHILDREN (node).release ();
79   SLP_TREE_SCALAR_STMTS (node).release ();
80   SLP_TREE_VEC_STMTS (node).release ();
81   SLP_TREE_LOAD_PERMUTATION (node).release ();
82
83   free (node);
84 }
85
86
87 /* Free the memory allocated for the SLP instance.  */
88
89 void
90 vect_free_slp_instance (slp_instance instance)
91 {
92   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93   SLP_INSTANCE_LOADS (instance).release ();
94   SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95   free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS.  */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
103 {
104   slp_tree node;
105   gimple stmt = scalar_stmts[0];
106   unsigned int nops;
107
108   if (is_gimple_call (stmt))
109     nops = gimple_call_num_args (stmt);
110   else if (is_gimple_assign (stmt))
111     {
112       nops = gimple_num_ops (stmt) - 1;
113       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114         nops++;
115     }
116   else
117     return NULL;
118
119   node = XNEW (struct _slp_tree);
120   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121   SLP_TREE_VEC_STMTS (node).create (0);
122   SLP_TREE_CHILDREN (node).create (nops);
123   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
124
125   return node;
126 }
127
128
129 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
130    operand.  */
131 static vec<slp_oprnd_info> 
132 vect_create_oprnd_info (int nops, int group_size)
133 {
134   int i;
135   slp_oprnd_info oprnd_info;
136   vec<slp_oprnd_info> oprnds_info;
137
138   oprnds_info.create (nops);
139   for (i = 0; i < nops; i++)
140     {
141       oprnd_info = XNEW (struct _slp_oprnd_info);
142       oprnd_info->def_stmts.create (group_size);
143       oprnd_info->first_dt = vect_uninitialized_def;
144       oprnd_info->first_op_type = NULL_TREE;
145       oprnd_info->first_pattern = false;
146       oprnds_info.quick_push (oprnd_info);
147     }
148
149   return oprnds_info;
150 }
151
152
153 /* Free operands info.  */
154
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
157 {
158   int i;
159   slp_oprnd_info oprnd_info;
160
161   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
162     {
163       oprnd_info->def_stmts.release ();
164       XDELETE (oprnd_info);
165     }
166
167   oprnds_info.release ();
168 }
169
170
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
173
174 static int
175 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
176 {
177   gimple next_stmt = first_stmt;
178   int result = 0;
179
180   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181     return -1;
182
183   do
184     {
185       if (next_stmt == stmt)
186         return result;
187       result++;
188       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
189     }
190   while (next_stmt);
191
192   return -1;
193 }
194
195
196 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
197    they are of a valid type and that they match the defs of the first stmt of
198    the SLP group (stored in OPRNDS_INFO).  */
199
200 static bool
201 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
202                              gimple stmt, bool first,
203                              vec<slp_oprnd_info> *oprnds_info)
204 {
205   tree oprnd;
206   unsigned int i, number_of_oprnds;
207   tree def;
208   gimple def_stmt;
209   enum vect_def_type dt = vect_uninitialized_def;
210   struct loop *loop = NULL;
211   bool pattern = false;
212   slp_oprnd_info oprnd_info;
213   int op_idx = 1;
214   tree compare_rhs = NULL_TREE;
215
216   if (loop_vinfo)
217     loop = LOOP_VINFO_LOOP (loop_vinfo);
218
219   if (is_gimple_call (stmt))
220     {
221       number_of_oprnds = gimple_call_num_args (stmt);
222       op_idx = 3;
223     }
224   else if (is_gimple_assign (stmt))
225     {
226       number_of_oprnds = gimple_num_ops (stmt) - 1;
227       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
228         number_of_oprnds++;
229     }
230   else
231     return false;
232
233   for (i = 0; i < number_of_oprnds; i++)
234     {
235       if (compare_rhs)
236         {
237           oprnd = compare_rhs;
238           compare_rhs = NULL_TREE;
239         }
240       else
241         oprnd = gimple_op (stmt, op_idx++);
242
243       oprnd_info = (*oprnds_info)[i];
244
245       if (COMPARISON_CLASS_P (oprnd))
246         {
247           compare_rhs = TREE_OPERAND (oprnd, 1);
248           oprnd = TREE_OPERAND (oprnd, 0);
249         }
250
251       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
252                                &def, &dt)
253           || (!def_stmt && dt != vect_constant_def))
254         {
255           if (dump_enabled_p ())
256             {
257               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
258                                "Build SLP failed: can't find def for ");
259               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
260             }
261
262           return false;
263         }
264
265       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
266          from the pattern.  Check that all the stmts of the node are in the
267          pattern.  */
268       if (def_stmt && gimple_bb (def_stmt)
269           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
270               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
271                   && gimple_code (def_stmt) != GIMPLE_PHI))
272           && vinfo_for_stmt (def_stmt)
273           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
274           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
275           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
276         {
277           pattern = true;
278           if (!first && !oprnd_info->first_pattern)
279             {
280               if (dump_enabled_p ())
281                 {
282                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
283                                    "Build SLP failed: some of the stmts"
284                                    " are in a pattern, and others are not ");
285                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
286                 }
287
288               return false;
289             }
290
291           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
292           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
293
294           if (dt == vect_unknown_def_type)
295             {
296               if (dump_enabled_p ())
297                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
298                                  "Unsupported pattern.");
299               return false;
300             }
301
302           switch (gimple_code (def_stmt))
303             {
304               case GIMPLE_PHI:
305                 def = gimple_phi_result (def_stmt);
306                 break;
307
308               case GIMPLE_ASSIGN:
309                 def = gimple_assign_lhs (def_stmt);
310                 break;
311
312               default:
313                 if (dump_enabled_p ())
314                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
315                                    "unsupported defining stmt: ");
316                 return false;
317             }
318         }
319
320       if (first)
321         {
322           oprnd_info->first_dt = dt;
323           oprnd_info->first_pattern = pattern;
324           oprnd_info->first_op_type = TREE_TYPE (oprnd);
325         }
326       else
327         {
328           /* Not first stmt of the group, check that the def-stmt/s match
329              the def-stmt/s of the first stmt.  Allow different definition
330              types for reduction chains: the first stmt must be a
331              vect_reduction_def (a phi node), and the rest
332              vect_internal_def.  */
333           if (((oprnd_info->first_dt != dt
334                 && !(oprnd_info->first_dt == vect_reduction_def
335                      && dt == vect_internal_def)
336                 && !((oprnd_info->first_dt == vect_external_def
337                       || oprnd_info->first_dt == vect_constant_def)
338                      && (dt == vect_external_def
339                          || dt == vect_constant_def)))
340                || !types_compatible_p (oprnd_info->first_op_type,
341                                        TREE_TYPE (oprnd))))
342             {
343               if (dump_enabled_p ())
344                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
345                                  "Build SLP failed: different types ");
346
347               return false;
348             }
349         }
350
351       /* Check the types of the definitions.  */
352       switch (dt)
353         {
354         case vect_constant_def:
355         case vect_external_def:
356         case vect_reduction_def:
357           break;
358
359         case vect_internal_def:
360           oprnd_info->def_stmts.quick_push (def_stmt);
361           break;
362
363         default:
364           /* FORNOW: Not supported.  */
365           if (dump_enabled_p ())
366             {
367               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
368                                "Build SLP failed: illegal type of def ");
369               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
370             }
371
372           return false;
373         }
374     }
375
376   return true;
377 }
378
379
380 /* Verify if the scalar stmts STMTS are isomorphic, require data
381    permutation or are of unsupported types of operation.  Return
382    true if they are, otherwise return false and indicate in *MATCHES
383    which stmts are not isomorphic to the first one.  If MATCHES[0]
384    is false then this indicates the comparison could not be
385    carried out or the stmts will never be vectorized by SLP.  */
386
387 static bool
388 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
389                        vec<gimple> stmts, unsigned int group_size,
390                        unsigned nops, unsigned int *max_nunits,
391                        unsigned int vectorization_factor, bool *matches)
392 {
393   unsigned int i;
394   gimple stmt = stmts[0];
395   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
396   enum tree_code first_cond_code = ERROR_MARK;
397   tree lhs;
398   bool need_same_oprnds = false;
399   tree vectype, scalar_type, first_op1 = NULL_TREE;
400   optab optab;
401   int icode;
402   enum machine_mode optab_op2_mode;
403   enum machine_mode vec_mode;
404   struct data_reference *first_dr;
405   HOST_WIDE_INT dummy;
406   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
407   tree cond;
408
409   /* For every stmt in NODE find its def stmt/s.  */
410   FOR_EACH_VEC_ELT (stmts, i, stmt)
411     {
412       matches[i] = false;
413
414       if (dump_enabled_p ())
415         {
416           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
417           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
418         }
419
420       /* Fail to vectorize statements marked as unvectorizable.  */
421       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
422         {
423           if (dump_enabled_p ())
424             {
425               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
426                                "Build SLP failed: unvectorizable statement ");
427               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
428             }
429           /* Fatal mismatch.  */
430           matches[0] = false;
431           return false;
432         }
433
434       lhs = gimple_get_lhs (stmt);
435       if (lhs == NULL_TREE)
436         {
437           if (dump_enabled_p ())
438             {
439               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440                                "Build SLP failed: not GIMPLE_ASSIGN nor "
441                                "GIMPLE_CALL ");
442               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
443             }
444           /* Fatal mismatch.  */
445           matches[0] = false;
446           return false;
447         }
448
449        if (is_gimple_assign (stmt)
450            && gimple_assign_rhs_code (stmt) == COND_EXPR
451            && (cond = gimple_assign_rhs1 (stmt))
452            && !COMPARISON_CLASS_P (cond))
453         {
454           if (dump_enabled_p ())
455             {
456               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
457                                "Build SLP failed: condition is not "
458                                "comparison ");
459               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
460             }
461           /* Fatal mismatch.  */
462           matches[0] = false;
463           return false;
464         }
465
466       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
467       vectype = get_vectype_for_scalar_type (scalar_type);
468       if (!vectype)
469         {
470           if (dump_enabled_p ())
471             {
472               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
473                                "Build SLP failed: unsupported data-type ");
474               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
475                                  scalar_type);
476             }
477           /* Fatal mismatch.  */
478           matches[0] = false;
479           return false;
480         }
481
482       /* In case of multiple types we need to detect the smallest type.  */
483       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
484         {
485           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
486           if (bb_vinfo)
487             vectorization_factor = *max_nunits;
488         }
489
490       if (is_gimple_call (stmt))
491         {
492           rhs_code = CALL_EXPR;
493           if (gimple_call_internal_p (stmt)
494               || gimple_call_tail_p (stmt)
495               || gimple_call_noreturn_p (stmt)
496               || !gimple_call_nothrow_p (stmt)
497               || gimple_call_chain (stmt))
498             {
499               if (dump_enabled_p ())
500                 {
501                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
502                                    "Build SLP failed: unsupported call type ");
503                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
504                 }
505               /* Fatal mismatch.  */
506               matches[0] = false;
507               return false;
508             }
509         }
510       else
511         rhs_code = gimple_assign_rhs_code (stmt);
512
513       /* Check the operation.  */
514       if (i == 0)
515         {
516           first_stmt_code = rhs_code;
517
518           /* Shift arguments should be equal in all the packed stmts for a
519              vector shift with scalar shift operand.  */
520           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
521               || rhs_code == LROTATE_EXPR
522               || rhs_code == RROTATE_EXPR)
523             {
524               vec_mode = TYPE_MODE (vectype);
525
526               /* First see if we have a vector/vector shift.  */
527               optab = optab_for_tree_code (rhs_code, vectype,
528                                            optab_vector);
529
530               if (!optab
531                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
532                 {
533                   /* No vector/vector shift, try for a vector/scalar shift.  */
534                   optab = optab_for_tree_code (rhs_code, vectype,
535                                                optab_scalar);
536
537                   if (!optab)
538                     {
539                       if (dump_enabled_p ())
540                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
541                                          "Build SLP failed: no optab.");
542                       /* Fatal mismatch.  */
543                       matches[0] = false;
544                       return false;
545                     }
546                   icode = (int) optab_handler (optab, vec_mode);
547                   if (icode == CODE_FOR_nothing)
548                     {
549                       if (dump_enabled_p ())
550                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
551                                          "Build SLP failed: "
552                                          "op not supported by target.");
553                       /* Fatal mismatch.  */
554                       matches[0] = false;
555                       return false;
556                     }
557                   optab_op2_mode = insn_data[icode].operand[2].mode;
558                   if (!VECTOR_MODE_P (optab_op2_mode))
559                     {
560                       need_same_oprnds = true;
561                       first_op1 = gimple_assign_rhs2 (stmt);
562                     }
563                 }
564             }
565           else if (rhs_code == WIDEN_LSHIFT_EXPR)
566             {
567               need_same_oprnds = true;
568               first_op1 = gimple_assign_rhs2 (stmt);
569             }
570         }
571       else
572         {
573           if (first_stmt_code != rhs_code
574               && (first_stmt_code != IMAGPART_EXPR
575                   || rhs_code != REALPART_EXPR)
576               && (first_stmt_code != REALPART_EXPR
577                   || rhs_code != IMAGPART_EXPR)
578               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
579                    && (first_stmt_code == ARRAY_REF
580                        || first_stmt_code == BIT_FIELD_REF
581                        || first_stmt_code == INDIRECT_REF
582                        || first_stmt_code == COMPONENT_REF
583                        || first_stmt_code == MEM_REF)))
584             {
585               if (dump_enabled_p ())
586                 {
587                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
588                                    "Build SLP failed: different operation "
589                                    "in stmt ");
590                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
591                 }
592               /* Mismatch.  */
593               continue;
594             }
595
596           if (need_same_oprnds
597               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
598             {
599               if (dump_enabled_p ())
600                 {
601                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
602                                    "Build SLP failed: different shift "
603                                    "arguments in ");
604                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
605                 }
606               /* Mismatch.  */
607               continue;
608             }
609
610           if (rhs_code == CALL_EXPR)
611             {
612               gimple first_stmt = stmts[0];
613               if (gimple_call_num_args (stmt) != nops
614                   || !operand_equal_p (gimple_call_fn (first_stmt),
615                                        gimple_call_fn (stmt), 0)
616                   || gimple_call_fntype (first_stmt)
617                      != gimple_call_fntype (stmt))
618                 {
619                   if (dump_enabled_p ())
620                     {
621                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
622                                        "Build SLP failed: different calls in ");
623                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
624                                         stmt, 0);
625                     }
626                   /* Mismatch.  */
627                   continue;
628                 }
629             }
630         }
631
632       /* Grouped store or load.  */
633       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
634         {
635           if (REFERENCE_CLASS_P (lhs))
636             {
637               /* Store.  */
638               ;
639             }
640           else
641             {
642               /* Load.  */
643               unsigned unrolling_factor
644                 = least_common_multiple
645                     (*max_nunits, group_size) / group_size;
646               /* FORNOW: Check that there is no gap between the loads
647                  and no gap between the groups when we need to load
648                  multiple groups at once.
649                  ???  We should enhance this to only disallow gaps
650                  inside vectors.  */
651               if ((unrolling_factor > 1
652                    && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
653                    && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
654                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
655                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
656                 {
657                   if (dump_enabled_p ())
658                     {
659                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
660                                        "Build SLP failed: grouped "
661                                        "loads have gaps ");
662                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
663                                         stmt, 0);
664                     }
665                   /* Fatal mismatch.  */
666                   matches[0] = false;
667                   return false;
668                 }
669
670               /* Check that the size of interleaved loads group is not
671                  greater than the SLP group size.  */
672               unsigned ncopies
673                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
674               if (loop_vinfo
675                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
676                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
677                        - GROUP_GAP (vinfo_for_stmt (stmt)))
678                       > ncopies * group_size))
679                 {
680                   if (dump_enabled_p ())
681                     {
682                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683                                        "Build SLP failed: the number "
684                                        "of interleaved loads is greater than "
685                                        "the SLP group size ");
686                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
687                                         stmt, 0);
688                     }
689                   /* Fatal mismatch.  */
690                   matches[0] = false;
691                   return false;
692                 }
693
694               old_first_load = first_load;
695               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
696               if (prev_first_load)
697                 {
698                   /* Check that there are no loads from different interleaving
699                      chains in the same node.  */
700                   if (prev_first_load != first_load)
701                     {
702                       if (dump_enabled_p ())
703                         {
704                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
705                                            vect_location, 
706                                            "Build SLP failed: different "
707                                            "interleaving chains in one node ");
708                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
709                                             stmt, 0);
710                         }
711                       /* Mismatch.  */
712                       continue;
713                     }
714                 }
715               else
716                 prev_first_load = first_load;
717
718               /* In some cases a group of loads is just the same load
719                  repeated N times.  Only analyze its cost once.  */
720               if (first_load == stmt && old_first_load != first_load)
721                 {
722                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
723                   if (vect_supportable_dr_alignment (first_dr, false)
724                       == dr_unaligned_unsupported)
725                     {
726                       if (dump_enabled_p ())
727                         {
728                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
729                                            vect_location, 
730                                            "Build SLP failed: unsupported "
731                                            "unaligned load ");
732                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
733                                             stmt, 0);
734                         }
735                       /* Fatal mismatch.  */
736                       matches[0] = false;
737                       return false;
738                     }
739                 }
740            }
741         } /* Grouped access.  */
742       else
743         {
744           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
745             {
746               /* Not grouped load.  */
747               if (dump_enabled_p ())
748                 {
749                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
750                                    "Build SLP failed: not grouped load ");
751                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
752                 }
753
754               /* FORNOW: Not grouped loads are not supported.  */
755               /* Fatal mismatch.  */
756               matches[0] = false;
757               return false;
758             }
759
760           /* Not memory operation.  */
761           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
762               && TREE_CODE_CLASS (rhs_code) != tcc_unary
763               && rhs_code != COND_EXPR
764               && rhs_code != CALL_EXPR)
765             {
766               if (dump_enabled_p ())
767                 {
768                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769                                    "Build SLP failed: operation");
770                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
771                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
772                 }
773               /* Fatal mismatch.  */
774               matches[0] = false;
775               return false;
776             }
777
778           if (rhs_code == COND_EXPR)
779             {
780               tree cond_expr = gimple_assign_rhs1 (stmt);
781
782               if (i == 0)
783                 first_cond_code = TREE_CODE (cond_expr);
784               else if (first_cond_code != TREE_CODE (cond_expr))
785                 {
786                   if (dump_enabled_p ())
787                     {
788                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
789                                        "Build SLP failed: different"
790                                        " operation");
791                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
792                                         stmt, 0);
793                     }
794                   /* Mismatch.  */
795                   continue;
796                 }
797             }
798         }
799
800       matches[i] = true;
801     }
802
803   for (i = 0; i < group_size; ++i)
804     if (!matches[i])
805       return false;
806
807   return true;
808 }
809
810 /* Recursively build an SLP tree starting from NODE.
811    Fail (and return a value not equal to zero) if def-stmts are not
812    isomorphic, require data permutation or are of unsupported types of
813    operation.  Otherwise, return 0.
814    The value returned is the depth in the SLP tree where a mismatch
815    was found.  */
816
817 static bool
818 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
819                      slp_tree *node, unsigned int group_size,
820                      unsigned int *max_nunits,
821                      vec<slp_tree> *loads,
822                      unsigned int vectorization_factor,
823                      bool *matches, unsigned *npermutes)
824 {
825   unsigned nops, i, this_npermutes = 0;
826   gimple stmt;
827
828   if (!matches)
829     matches = XALLOCAVEC (bool, group_size);
830   if (!npermutes)
831     npermutes = &this_npermutes;
832
833   matches[0] = false;
834
835   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
836   if (is_gimple_call (stmt))
837     nops = gimple_call_num_args (stmt);
838   else if (is_gimple_assign (stmt))
839     {
840       nops = gimple_num_ops (stmt) - 1;
841       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
842         nops++;
843     }
844   else
845     return false;
846
847   if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
848                               SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
849                               max_nunits, vectorization_factor, matches))
850     return false;
851
852   /* If the SLP node is a load, terminate the recursion.  */
853   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
854       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
855     {
856       loads->safe_push (*node);
857       return true;
858     }
859
860   /* Get at the operands, verifying they are compatible.  */
861   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
862   slp_oprnd_info oprnd_info;
863   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
864     {
865       if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
866                                         stmt, (i == 0), &oprnds_info))
867         {
868           vect_free_oprnd_info (oprnds_info);
869           return false;
870         }
871     }
872
873   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
874
875   /* Create SLP_TREE nodes for the definition node/s.  */
876   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
877     {
878       slp_tree child;
879       unsigned old_nloads = loads->length ();
880       unsigned old_max_nunits = *max_nunits;
881
882       if (oprnd_info->first_dt != vect_internal_def)
883         continue;
884
885       child = vect_create_new_slp_node (oprnd_info->def_stmts);
886       if (!child)
887         {
888           vect_free_oprnd_info (oprnds_info);
889           return false;
890         }
891
892       bool *matches = XALLOCAVEC (bool, group_size);
893       if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
894                                group_size, max_nunits, loads,
895                                vectorization_factor, matches, npermutes))
896         {
897           oprnd_info->def_stmts = vNULL;
898           SLP_TREE_CHILDREN (*node).quick_push (child);
899           continue;
900         }
901
902       /* If the SLP build for operand zero failed and operand zero
903          and one can be commutated try that for the scalar stmts
904          that failed the match.  */
905       if (i == 0
906           /* A first scalar stmt mismatch signals a fatal mismatch.  */
907           && matches[0]
908           /* ???  For COND_EXPRs we can swap the comparison operands
909              as well as the arms under some constraints.  */
910           && nops == 2
911           && oprnds_info[1]->first_dt == vect_internal_def
912           && is_gimple_assign (stmt)
913           && commutative_tree_code (gimple_assign_rhs_code (stmt))
914           /* Do so only if the number of not successful permutes was nor more
915              than a cut-ff as re-trying the recursive match on
916              possibly each level of the tree would expose exponential
917              behavior.  */
918           && *npermutes < 4)
919         {
920           /* Roll back.  */
921           *max_nunits = old_max_nunits;
922           loads->truncate (old_nloads);
923           /* Swap mismatched definition stmts.  */
924           for (unsigned j = 0; j < group_size; ++j)
925             if (!matches[j])
926               {
927                 gimple tem = oprnds_info[0]->def_stmts[j];
928                 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
929                 oprnds_info[1]->def_stmts[j] = tem;
930               }
931           /* And try again ... */
932           if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
933                                    group_size, max_nunits, loads,
934                                    vectorization_factor,
935                                    matches, npermutes))
936             {
937               oprnd_info->def_stmts = vNULL;
938               SLP_TREE_CHILDREN (*node).quick_push (child);
939               continue;
940             }
941
942           ++*npermutes;
943         }
944
945       oprnd_info->def_stmts = vNULL;
946       vect_free_slp_tree (child);
947       vect_free_oprnd_info (oprnds_info);
948       return false;
949     }
950
951   vect_free_oprnd_info (oprnds_info);
952   return true;
953 }
954
955 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
956
957 static void
958 vect_print_slp_tree (int dump_kind, slp_tree node)
959 {
960   int i;
961   gimple stmt;
962   slp_tree child;
963
964   if (!node)
965     return;
966
967   dump_printf (dump_kind, "node ");
968   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
969     {
970       dump_printf (dump_kind, "\n\tstmt %d ", i);
971       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
972     }
973   dump_printf (dump_kind, "\n");
974
975   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
976     vect_print_slp_tree (dump_kind, child);
977 }
978
979
980 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
981    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
982    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
983    stmts in NODE are to be marked.  */
984
985 static void
986 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
987 {
988   int i;
989   gimple stmt;
990   slp_tree child;
991
992   if (!node)
993     return;
994
995   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
996     if (j < 0 || i == j)
997       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
998
999   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1000     vect_mark_slp_stmts (child, mark, j);
1001 }
1002
1003
1004 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1005
1006 static void
1007 vect_mark_slp_stmts_relevant (slp_tree node)
1008 {
1009   int i;
1010   gimple stmt;
1011   stmt_vec_info stmt_info;
1012   slp_tree child;
1013
1014   if (!node)
1015     return;
1016
1017   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1018     {
1019       stmt_info = vinfo_for_stmt (stmt);
1020       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1021                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1022       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1023     }
1024
1025   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1026     vect_mark_slp_stmts_relevant (child);
1027 }
1028
1029
1030 /* Rearrange the statements of NODE according to PERMUTATION.  */
1031
1032 static void
1033 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1034                           vec<unsigned> permutation)
1035 {
1036   gimple stmt;
1037   vec<gimple> tmp_stmts;
1038   unsigned int i;
1039   slp_tree child;
1040
1041   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1042     vect_slp_rearrange_stmts (child, group_size, permutation);
1043
1044   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1045   tmp_stmts.create (group_size);
1046   tmp_stmts.quick_grow_cleared (group_size);
1047
1048   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1049     tmp_stmts[permutation[i]] = stmt;
1050
1051   SLP_TREE_SCALAR_STMTS (node).release ();
1052   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1053 }
1054
1055
1056 /* Check if the required load permutations in the SLP instance
1057    SLP_INSTN are supported.  */
1058
1059 static bool
1060 vect_supported_load_permutation_p (slp_instance slp_instn)
1061 {
1062   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1063   unsigned int i, j, k, next;
1064   sbitmap load_index;
1065   slp_tree node;
1066   gimple stmt, load, next_load, first_load;
1067   struct data_reference *dr;
1068
1069   if (dump_enabled_p ())
1070     {
1071       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1072       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1073         if (node->load_permutation.exists ())
1074           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1075             dump_printf (MSG_NOTE, "%d ", next);
1076         else
1077           for (i = 0; i < group_size; ++i)
1078             dump_printf (MSG_NOTE, "%d ", i);
1079     }
1080
1081   /* In case of reduction every load permutation is allowed, since the order
1082      of the reduction statements is not important (as opposed to the case of
1083      grouped stores).  The only condition we need to check is that all the
1084      load nodes are of the same size and have the same permutation (and then
1085      rearrange all the nodes of the SLP instance according to this 
1086      permutation).  */
1087
1088   /* Check that all the load nodes are of the same size.  */
1089   /* ???  Can't we assert this? */
1090   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1091     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1092       return false;
1093
1094   node = SLP_INSTANCE_TREE (slp_instn);
1095   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1096
1097   /* Reduction (there are no data-refs in the root).
1098      In reduction chain the order of the loads is important.  */
1099   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1100       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1101     {
1102       slp_tree load;
1103       unsigned int lidx;
1104
1105       /* Compare all the permutation sequences to the first one.  We know
1106          that at least one load is permuted.  */
1107       node = SLP_INSTANCE_LOADS (slp_instn)[0];
1108       if (!node->load_permutation.exists ())
1109         return false;
1110       for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1111         {
1112           if (!load->load_permutation.exists ())
1113             return false;
1114           FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1115             if (lidx != node->load_permutation[j])
1116               return false;
1117         }
1118
1119       /* Check that the loads in the first sequence are different and there
1120          are no gaps between them.  */
1121       load_index = sbitmap_alloc (group_size);
1122       bitmap_clear (load_index);
1123       FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1124         {
1125           if (bitmap_bit_p (load_index, lidx))
1126             {
1127               sbitmap_free (load_index);
1128               return false;
1129             }
1130           bitmap_set_bit (load_index, lidx);
1131         }
1132       for (i = 0; i < group_size; i++)
1133         if (!bitmap_bit_p (load_index, i))
1134           {
1135             sbitmap_free (load_index);
1136             return false;
1137           }
1138       sbitmap_free (load_index);
1139
1140       /* This permutation is valid for reduction.  Since the order of the
1141          statements in the nodes is not important unless they are memory
1142          accesses, we can rearrange the statements in all the nodes
1143          according to the order of the loads.  */
1144       vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1145                                 node->load_permutation);
1146
1147       /* We are done, no actual permutations need to be generated.  */
1148       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1149         SLP_TREE_LOAD_PERMUTATION (node).release ();
1150       return true;
1151     }
1152
1153   /* In basic block vectorization we allow any subchain of an interleaving
1154      chain.
1155      FORNOW: not supported in loop SLP because of realignment compications.  */
1156   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1157     {
1158       /* Check that for every node in the instance the loads
1159          form a subchain.  */
1160       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1161         {
1162           next_load = NULL;
1163           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1164             {
1165               if (j != 0 && next_load != load)
1166                 return false;
1167               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1168             }
1169         }
1170
1171       /* Check that the alignment of the first load in every subchain, i.e.,
1172          the first statement in every load node, is supported.
1173          ???  This belongs in alignment checking.  */
1174       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1175         {
1176           first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1177           if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1178             {
1179               dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1180               if (vect_supportable_dr_alignment (dr, false)
1181                   == dr_unaligned_unsupported)
1182                 {
1183                   if (dump_enabled_p ())
1184                     {
1185                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1186                                        vect_location,
1187                                        "unsupported unaligned load ");
1188                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1189                                         first_load, 0);
1190                     }
1191                   return false;
1192                 }
1193             }
1194         }
1195
1196       /* We are done, no actual permutations need to be generated.  */
1197       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1198         SLP_TREE_LOAD_PERMUTATION (node).release ();
1199       return true;
1200     }
1201
1202   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1203      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1204      well (unless it's reduction).  */
1205   if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1206     return false;
1207   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1208     if (!node->load_permutation.exists ())
1209       return false;
1210
1211   load_index = sbitmap_alloc (group_size);
1212   bitmap_clear (load_index);
1213   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1214     {
1215       unsigned int lidx = node->load_permutation[0];
1216       if (bitmap_bit_p (load_index, lidx))
1217         {
1218           sbitmap_free (load_index);
1219           return false;
1220         }
1221       bitmap_set_bit (load_index, lidx);
1222       FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1223         if (k != lidx)
1224           {
1225             sbitmap_free (load_index);
1226             return false;
1227           }
1228     }
1229   for (i = 0; i < group_size; i++)
1230     if (!bitmap_bit_p (load_index, i))
1231       {
1232         sbitmap_free (load_index);
1233         return false;
1234       }
1235   sbitmap_free (load_index);
1236
1237   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1238     if (node->load_permutation.exists ()
1239         && !vect_transform_slp_perm_load
1240               (node, vNULL, NULL,
1241                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1242       return false;
1243   return true;
1244 }
1245
1246
1247 /* Find the first load in the loop that belongs to INSTANCE.
1248    When loads are in several SLP nodes, there can be a case in which the first
1249    load does not appear in the first SLP node to be transformed, causing
1250    incorrect order of statements.  Since we generate all the loads together,
1251    they must be inserted before the first load of the SLP instance and not
1252    before the first load of the first node of the instance.  */
1253
1254 static gimple
1255 vect_find_first_load_in_slp_instance (slp_instance instance)
1256 {
1257   int i, j;
1258   slp_tree load_node;
1259   gimple first_load = NULL, load;
1260
1261   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1262     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1263       first_load = get_earlier_stmt (load, first_load);
1264
1265   return first_load;
1266 }
1267
1268
1269 /* Find the last store in SLP INSTANCE.  */
1270
1271 static gimple
1272 vect_find_last_store_in_slp_instance (slp_instance instance)
1273 {
1274   int i;
1275   slp_tree node;
1276   gimple last_store = NULL, store;
1277
1278   node = SLP_INSTANCE_TREE (instance);
1279   for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1280     last_store = get_later_stmt (store, last_store);
1281
1282   return last_store;
1283 }
1284
1285 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1286
1287 static void
1288 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1289                          slp_instance instance, slp_tree node,
1290                          stmt_vector_for_cost *prologue_cost_vec,
1291                          unsigned ncopies_for_cost)
1292 {
1293   stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1294
1295   unsigned i;
1296   slp_tree child;
1297   gimple stmt, s;
1298   stmt_vec_info stmt_info;
1299   tree lhs;
1300   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1301
1302   /* Recurse down the SLP tree.  */
1303   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1304     vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1305                              instance, child, prologue_cost_vec,
1306                              ncopies_for_cost);
1307
1308   /* Look at the first scalar stmt to determine the cost.  */
1309   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1310   stmt_info = vinfo_for_stmt (stmt);
1311   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1312     {
1313       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1314         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1315                                vect_uninitialized_def,
1316                                node, prologue_cost_vec, body_cost_vec);
1317       else
1318         {
1319           int i;
1320           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1321           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1322                                 node, prologue_cost_vec, body_cost_vec);
1323           /* If the load is permuted record the cost for the permutation.
1324              ???  Loads from multiple chains are let through here only
1325              for a single special case involving complex numbers where
1326              in the end no permutation is necessary.  */
1327           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1328             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1329                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1330                 && vect_get_place_in_interleaving_chain
1331                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1332               {
1333                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1334                                   stmt_info, 0, vect_body);
1335                 break;
1336               }
1337         }
1338     }
1339   else
1340     record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1341                       stmt_info, 0, vect_body);
1342
1343   /* Scan operands and account for prologue cost of constants/externals.
1344      ???  This over-estimates cost for multiple uses and should be
1345      re-engineered.  */
1346   lhs = gimple_get_lhs (stmt);
1347   for (i = 0; i < gimple_num_ops (stmt); ++i)
1348     {
1349       tree def, op = gimple_op (stmt, i);
1350       gimple def_stmt;
1351       enum vect_def_type dt;
1352       if (!op || op == lhs)
1353         continue;
1354       if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1355                               &def_stmt, &def, &dt)
1356           && (dt == vect_constant_def || dt == vect_external_def))
1357         record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1358                           stmt_info, 0, vect_prologue);
1359     }
1360 }
1361
1362 /* Compute the cost for the SLP instance INSTANCE.  */
1363
1364 static void
1365 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1366                        slp_instance instance, unsigned nunits)
1367 {
1368   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1369   unsigned ncopies_for_cost;
1370   stmt_info_for_cost *si;
1371   unsigned i;
1372
1373   /* Calculate the number of vector stmts to create based on the unrolling
1374      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1375      GROUP_SIZE / NUNITS otherwise.  */
1376   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1377   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1378
1379   prologue_cost_vec.create (10);
1380   body_cost_vec.create (10);
1381   SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1382   vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1383                            instance, SLP_INSTANCE_TREE (instance),
1384                            &prologue_cost_vec, ncopies_for_cost);
1385
1386   /* Record the prologue costs, which were delayed until we were
1387      sure that SLP was successful.  Unlike the body costs, we know
1388      the final values now regardless of the loop vectorization factor.  */
1389   void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1390                 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1391   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1392     {
1393       struct _stmt_vec_info *stmt_info
1394         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1395       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1396                             si->misalign, vect_prologue);
1397     }
1398
1399   prologue_cost_vec.release ();
1400 }
1401
1402 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1403    vect_build_slp_tree to build a tree of packed stmts if possible.
1404    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1405
1406 static bool
1407 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1408                            gimple stmt)
1409 {
1410   slp_instance new_instance;
1411   slp_tree node;
1412   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1413   unsigned int unrolling_factor = 1, nunits;
1414   tree vectype, scalar_type = NULL_TREE;
1415   gimple next;
1416   unsigned int vectorization_factor = 0;
1417   int i;
1418   unsigned int max_nunits = 0;
1419   vec<slp_tree> loads;
1420   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1421   vec<gimple> scalar_stmts;
1422
1423   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1424     {
1425       if (dr)
1426         {
1427           scalar_type = TREE_TYPE (DR_REF (dr));
1428           vectype = get_vectype_for_scalar_type (scalar_type);
1429         }
1430       else
1431         {
1432           gcc_assert (loop_vinfo);
1433           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1434         }
1435
1436       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1437     }
1438   else
1439     {
1440       gcc_assert (loop_vinfo);
1441       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1442       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1443     }
1444
1445   if (!vectype)
1446     {
1447       if (dump_enabled_p ())
1448         {
1449           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1450                            "Build SLP failed: unsupported data-type ");
1451           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1452         }
1453
1454       return false;
1455     }
1456
1457   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1458   if (loop_vinfo)
1459     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1460   else
1461     vectorization_factor = nunits;
1462
1463   /* Calculate the unrolling factor.  */
1464   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1465   if (unrolling_factor != 1 && !loop_vinfo)
1466     {
1467       if (dump_enabled_p ())
1468         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1469                          "Build SLP failed: unrolling required in basic"
1470                          " block SLP");
1471
1472       return false;
1473     }
1474
1475   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1476   scalar_stmts.create (group_size);
1477   next = stmt;
1478   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1479     {
1480       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1481       while (next)
1482         {
1483           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1484               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1485             scalar_stmts.safe_push (
1486                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1487           else
1488             scalar_stmts.safe_push (next);
1489           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1490         }
1491     }
1492   else
1493     {
1494       /* Collect reduction statements.  */
1495       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1496       for (i = 0; reductions.iterate (i, &next); i++)
1497         scalar_stmts.safe_push (next);
1498     }
1499
1500   node = vect_create_new_slp_node (scalar_stmts);
1501
1502   loads.create (group_size);
1503
1504   /* Build the tree for the SLP instance.  */
1505   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1506                            &max_nunits, &loads,
1507                            vectorization_factor, NULL, NULL))
1508     {
1509       /* Calculate the unrolling factor based on the smallest type.  */
1510       if (max_nunits > nunits)
1511         unrolling_factor = least_common_multiple (max_nunits, group_size)
1512                            / group_size;
1513
1514       if (unrolling_factor != 1 && !loop_vinfo)
1515         {
1516           if (dump_enabled_p ())
1517             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1518                              "Build SLP failed: unrolling required in basic"
1519                              " block SLP");
1520           vect_free_slp_tree (node);
1521           loads.release ();
1522           return false;
1523         }
1524
1525       /* Create a new SLP instance.  */
1526       new_instance = XNEW (struct _slp_instance);
1527       SLP_INSTANCE_TREE (new_instance) = node;
1528       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1529       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1530       SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1531       SLP_INSTANCE_LOADS (new_instance) = loads;
1532       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1533
1534       /* Compute the load permutation.  */
1535       slp_tree load_node;
1536       bool loads_permuted = false;
1537       FOR_EACH_VEC_ELT (loads, i, load_node)
1538         {
1539           vec<unsigned> load_permutation;
1540           int j;
1541           gimple load, first_stmt;
1542           bool this_load_permuted = false;
1543           load_permutation.create (group_size);
1544           first_stmt = GROUP_FIRST_ELEMENT
1545               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1546           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1547             {
1548               int load_place
1549                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1550               gcc_assert (load_place != -1);
1551               if (load_place != j)
1552                 this_load_permuted = true;
1553               load_permutation.safe_push (load_place);
1554             }
1555           if (!this_load_permuted)
1556             {
1557               load_permutation.release ();
1558               continue;
1559             }
1560           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1561           loads_permuted = true;
1562         }
1563
1564       if (loads_permuted)
1565         {
1566           if (!vect_supported_load_permutation_p (new_instance))
1567             {
1568               if (dump_enabled_p ())
1569                 {
1570                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1571                                    "Build SLP failed: unsupported load "
1572                                    "permutation ");
1573                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1574                 }
1575               vect_free_slp_instance (new_instance);
1576               return false;
1577             }
1578
1579           SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1580             = vect_find_first_load_in_slp_instance (new_instance);
1581         }
1582
1583       /* Compute the costs of this SLP instance.  */
1584       vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1585                              new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1586
1587       if (loop_vinfo)
1588         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1589       else
1590         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1591
1592       if (dump_enabled_p ())
1593         vect_print_slp_tree (MSG_NOTE, node);
1594
1595       return true;
1596     }
1597
1598   /* Failed to SLP.  */
1599   /* Free the allocated memory.  */
1600   vect_free_slp_tree (node);
1601   loads.release ();
1602
1603   return false;
1604 }
1605
1606
1607 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1608    trees of packed scalar stmts if SLP is possible.  */
1609
1610 bool
1611 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1612 {
1613   unsigned int i;
1614   vec<gimple> grouped_stores;
1615   vec<gimple> reductions = vNULL;
1616   vec<gimple> reduc_chains = vNULL;
1617   gimple first_element;
1618   bool ok = false;
1619
1620   if (dump_enabled_p ())
1621     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1622
1623   if (loop_vinfo)
1624     {
1625       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1626       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1627       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1628     }
1629   else
1630     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1631
1632   /* Find SLP sequences starting from groups of grouped stores.  */
1633   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1634     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1635       ok = true;
1636
1637   if (bb_vinfo && !ok)
1638     {
1639       if (dump_enabled_p ())
1640         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1641                          "Failed to SLP the basic block.");
1642
1643       return false;
1644     }
1645
1646   if (loop_vinfo
1647       && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1648     {
1649       /* Find SLP sequences starting from reduction chains.  */
1650       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1651         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1652           ok = true;
1653         else
1654           return false;
1655
1656       /* Don't try to vectorize SLP reductions if reduction chain was
1657          detected.  */
1658       return ok;
1659     }
1660
1661   /* Find SLP sequences starting from groups of reductions.  */
1662   if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1663       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1664     ok = true;
1665
1666   return true;
1667 }
1668
1669
1670 /* For each possible SLP instance decide whether to SLP it and calculate overall
1671    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1672    least one instance.  */
1673
1674 bool
1675 vect_make_slp_decision (loop_vec_info loop_vinfo)
1676 {
1677   unsigned int i, unrolling_factor = 1;
1678   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1679   slp_instance instance;
1680   int decided_to_slp = 0;
1681
1682   if (dump_enabled_p ())
1683     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1684
1685   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1686     {
1687       /* FORNOW: SLP if you can.  */
1688       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1689         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1690
1691       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1692          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1693          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1694       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1695       decided_to_slp++;
1696     }
1697
1698   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1699
1700   if (decided_to_slp && dump_enabled_p ())
1701     dump_printf_loc (MSG_NOTE, vect_location,
1702                      "Decided to SLP %d instances. Unrolling factor %d",
1703                      decided_to_slp, unrolling_factor);
1704
1705   return (decided_to_slp > 0);
1706 }
1707
1708
1709 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1710    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1711
1712 static void
1713 vect_detect_hybrid_slp_stmts (slp_tree node)
1714 {
1715   int i;
1716   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1717   gimple stmt = stmts[0];
1718   imm_use_iterator imm_iter;
1719   gimple use_stmt;
1720   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1721   slp_tree child;
1722   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1723   struct loop *loop = NULL;
1724   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1725   basic_block bb = NULL;
1726
1727   if (!node)
1728     return;
1729
1730   if (loop_vinfo)
1731     loop = LOOP_VINFO_LOOP (loop_vinfo);
1732   else
1733     bb = BB_VINFO_BB (bb_vinfo);
1734
1735   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1736     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1737         && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1738       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1739         if (gimple_bb (use_stmt)
1740             && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1741                  || bb == gimple_bb (use_stmt))
1742             && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1743             && !STMT_SLP_TYPE (stmt_vinfo)
1744             && (STMT_VINFO_RELEVANT (stmt_vinfo)
1745                 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1746             && !(gimple_code (use_stmt) == GIMPLE_PHI
1747                  && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1748                   == vect_reduction_def))
1749           vect_mark_slp_stmts (node, hybrid, i);
1750
1751   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1752     vect_detect_hybrid_slp_stmts (child);
1753 }
1754
1755
1756 /* Find stmts that must be both vectorized and SLPed.  */
1757
1758 void
1759 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1760 {
1761   unsigned int i;
1762   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1763   slp_instance instance;
1764
1765   if (dump_enabled_p ())
1766     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1767
1768   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1769     vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1770 }
1771
1772
1773 /* Create and initialize a new bb_vec_info struct for BB, as well as
1774    stmt_vec_info structs for all the stmts in it.  */
1775
1776 static bb_vec_info
1777 new_bb_vec_info (basic_block bb)
1778 {
1779   bb_vec_info res = NULL;
1780   gimple_stmt_iterator gsi;
1781
1782   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1783   BB_VINFO_BB (res) = bb;
1784
1785   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1786     {
1787       gimple stmt = gsi_stmt (gsi);
1788       gimple_set_uid (stmt, 0);
1789       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1790     }
1791
1792   BB_VINFO_GROUPED_STORES (res).create (10);
1793   BB_VINFO_SLP_INSTANCES (res).create (2);
1794   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1795
1796   bb->aux = res;
1797   return res;
1798 }
1799
1800
1801 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1802    stmts in the basic block.  */
1803
1804 static void
1805 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1806 {
1807   vec<slp_instance> slp_instances;
1808   slp_instance instance;
1809   basic_block bb;
1810   gimple_stmt_iterator si;
1811   unsigned i;
1812
1813   if (!bb_vinfo)
1814     return;
1815
1816   bb = BB_VINFO_BB (bb_vinfo);
1817
1818   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1819     {
1820       gimple stmt = gsi_stmt (si);
1821       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1822
1823       if (stmt_info)
1824         /* Free stmt_vec_info.  */
1825         free_stmt_vec_info (stmt);
1826     }
1827
1828   vect_destroy_datarefs (NULL, bb_vinfo);
1829   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1830   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1831   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1832   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1833     vect_free_slp_instance (instance);
1834   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1835   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1836   free (bb_vinfo);
1837   bb->aux = NULL;
1838 }
1839
1840
1841 /* Analyze statements contained in SLP tree node after recursively analyzing
1842    the subtree. Return TRUE if the operations are supported.  */
1843
1844 static bool
1845 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1846 {
1847   bool dummy;
1848   int i;
1849   gimple stmt;
1850   slp_tree child;
1851
1852   if (!node)
1853     return true;
1854
1855   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1856     if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1857       return false;
1858
1859   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1860     {
1861       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1862       gcc_assert (stmt_info);
1863       gcc_assert (PURE_SLP_STMT (stmt_info));
1864
1865       if (!vect_analyze_stmt (stmt, &dummy, node))
1866         return false;
1867     }
1868
1869   return true;
1870 }
1871
1872
1873 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
1874    operations are supported. */
1875
1876 static bool
1877 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1878 {
1879   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1880   slp_instance instance;
1881   int i;
1882
1883   for (i = 0; slp_instances.iterate (i, &instance); )
1884     {
1885       if (!vect_slp_analyze_node_operations (bb_vinfo,
1886                                              SLP_INSTANCE_TREE (instance)))
1887         {
1888           vect_free_slp_instance (instance);
1889           slp_instances.ordered_remove (i);
1890         }
1891       else
1892         i++;
1893     }
1894
1895   if (!slp_instances.length ())
1896     return false;
1897
1898   return true;
1899 }
1900
1901
1902 /* Compute the scalar cost of the SLP node NODE and its children
1903    and return it.  Do not account defs that are marked in LIFE and
1904    update LIFE according to uses of NODE.  */
1905
1906 static unsigned
1907 vect_bb_slp_scalar_cost (basic_block bb,
1908                          slp_tree node, vec<bool, va_stack> life)
1909 {
1910   unsigned scalar_cost = 0;
1911   unsigned i;
1912   gimple stmt;
1913   slp_tree child;
1914
1915   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1916     {
1917       unsigned stmt_cost;
1918       ssa_op_iter op_iter;
1919       def_operand_p def_p;
1920       stmt_vec_info stmt_info;
1921
1922       if (life[i])
1923         continue;
1924
1925       /* If there is a non-vectorized use of the defs then the scalar
1926          stmt is kept live in which case we do not account it or any
1927          required defs in the SLP children in the scalar cost.  This
1928          way we make the vectorization more costly when compared to
1929          the scalar cost.  */
1930       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
1931         {
1932           imm_use_iterator use_iter;
1933           gimple use_stmt;
1934           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
1935             if (gimple_code (use_stmt) == GIMPLE_PHI
1936                 || gimple_bb (use_stmt) != bb
1937                 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))
1938               {
1939                 life[i] = true;
1940                 BREAK_FROM_IMM_USE_STMT (use_iter);
1941               }
1942         }
1943       if (life[i])
1944         continue;
1945
1946       stmt_info = vinfo_for_stmt (stmt);
1947       if (STMT_VINFO_DATA_REF (stmt_info))
1948         {
1949           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1950             stmt_cost = vect_get_stmt_cost (scalar_load);
1951           else
1952             stmt_cost = vect_get_stmt_cost (scalar_store);
1953         }
1954       else
1955         stmt_cost = vect_get_stmt_cost (scalar_stmt);
1956
1957       scalar_cost += stmt_cost;
1958     }
1959
1960   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1961     scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
1962
1963   return scalar_cost;
1964 }
1965
1966 /* Check if vectorization of the basic block is profitable.  */
1967
1968 static bool
1969 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1970 {
1971   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1972   slp_instance instance;
1973   int i, j;
1974   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
1975   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
1976   void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
1977   stmt_vec_info stmt_info = NULL;
1978   stmt_vector_for_cost body_cost_vec;
1979   stmt_info_for_cost *ci;
1980
1981   /* Calculate vector costs.  */
1982   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1983     {
1984       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
1985
1986       FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
1987         {
1988           stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
1989           (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
1990                                 stmt_info, ci->misalign, vect_body);
1991         }
1992     }
1993
1994   /* Calculate scalar cost.  */
1995   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1996     {
1997       vec<bool, va_stack> life;
1998       vec_stack_alloc (bool, life, SLP_INSTANCE_GROUP_SIZE (instance));
1999       life.quick_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2000       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2001                                               SLP_INSTANCE_TREE (instance),
2002                                               life);
2003       life.release ();
2004     }
2005
2006   /* Complete the target-specific cost calculation.  */
2007   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2008                &vec_inside_cost, &vec_epilogue_cost);
2009
2010   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2011
2012   if (dump_enabled_p ())
2013     {
2014       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2015       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2016                    vec_inside_cost);
2017       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2018       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2019       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d", scalar_cost);
2020     }
2021
2022   /* Vectorization is profitable if its cost is less than the cost of scalar
2023      version.  */
2024   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2025     return false;
2026
2027   return true;
2028 }
2029
2030 /* Check if the basic block can be vectorized.  */
2031
2032 static bb_vec_info
2033 vect_slp_analyze_bb_1 (basic_block bb)
2034 {
2035   bb_vec_info bb_vinfo;
2036   vec<slp_instance> slp_instances;
2037   slp_instance instance;
2038   int i;
2039   int min_vf = 2;
2040
2041   bb_vinfo = new_bb_vec_info (bb);
2042   if (!bb_vinfo)
2043     return NULL;
2044
2045   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2046     {
2047       if (dump_enabled_p ())
2048         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2049                          "not vectorized: unhandled data-ref in basic "
2050                          "block.\n");
2051
2052       destroy_bb_vec_info (bb_vinfo);
2053       return NULL;
2054     }
2055
2056   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2057     {
2058       if (dump_enabled_p ())
2059         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2060                          "not vectorized: not enough data-refs in "
2061                          "basic block.\n");
2062
2063       destroy_bb_vec_info (bb_vinfo);
2064       return NULL;
2065     }
2066
2067   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2068     {
2069      if (dump_enabled_p ())
2070        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2071                         "not vectorized: unhandled data access in "
2072                         "basic block.\n");
2073
2074       destroy_bb_vec_info (bb_vinfo);
2075       return NULL;
2076     }
2077
2078   vect_pattern_recog (NULL, bb_vinfo);
2079
2080   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2081      {
2082        if (dump_enabled_p ())
2083          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2084                           "not vectorized: unhandled data dependence "
2085                           "in basic block.\n");
2086
2087        destroy_bb_vec_info (bb_vinfo);
2088        return NULL;
2089      }
2090
2091   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2092     {
2093       if (dump_enabled_p ())
2094         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2095                          "not vectorized: bad data alignment in basic "
2096                          "block.\n");
2097
2098       destroy_bb_vec_info (bb_vinfo);
2099       return NULL;
2100     }
2101
2102   /* Check the SLP opportunities in the basic block, analyze and build SLP
2103      trees.  */
2104   if (!vect_analyze_slp (NULL, bb_vinfo))
2105     {
2106       if (dump_enabled_p ())
2107         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2108                          "not vectorized: failed to find SLP opportunities "
2109                          "in basic block.\n");
2110
2111       destroy_bb_vec_info (bb_vinfo);
2112       return NULL;
2113     }
2114
2115   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2116
2117   /* Mark all the statements that we want to vectorize as pure SLP and
2118      relevant.  */
2119   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2120     {
2121       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2122       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2123     }
2124
2125   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2126     {
2127       if (dump_enabled_p ())
2128         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2129                          "not vectorized: unsupported alignment in basic "
2130                          "block.\n");
2131       destroy_bb_vec_info (bb_vinfo);
2132       return NULL;
2133     }
2134
2135   if (!vect_slp_analyze_operations (bb_vinfo))
2136     {
2137       if (dump_enabled_p ())
2138         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2139                          "not vectorized: bad operation in basic block.\n");
2140
2141       destroy_bb_vec_info (bb_vinfo);
2142       return NULL;
2143     }
2144
2145   /* Cost model: check if the vectorization is worthwhile.  */
2146   if (flag_vect_cost_model
2147       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2148     {
2149       if (dump_enabled_p ())
2150         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2151                          "not vectorized: vectorization is not "
2152                          "profitable.\n");
2153
2154       destroy_bb_vec_info (bb_vinfo);
2155       return NULL;
2156     }
2157
2158   if (dump_enabled_p ())
2159     dump_printf_loc (MSG_NOTE, vect_location,
2160                      "Basic block will be vectorized using SLP\n");
2161
2162   return bb_vinfo;
2163 }
2164
2165
2166 bb_vec_info
2167 vect_slp_analyze_bb (basic_block bb)
2168 {
2169   bb_vec_info bb_vinfo;
2170   int insns = 0;
2171   gimple_stmt_iterator gsi;
2172   unsigned int vector_sizes;
2173
2174   if (dump_enabled_p ())
2175     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2176
2177   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2178     {
2179       gimple stmt = gsi_stmt (gsi);
2180       if (!is_gimple_debug (stmt)
2181           && !gimple_nop_p (stmt)
2182           && gimple_code (stmt) != GIMPLE_LABEL)
2183         insns++;
2184     }
2185
2186   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2187     {
2188       if (dump_enabled_p ())
2189         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2190                          "not vectorized: too many instructions in "
2191                          "basic block.\n");
2192
2193       return NULL;
2194     }
2195
2196   /* Autodetect first vector size we try.  */
2197   current_vector_size = 0;
2198   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2199
2200   while (1)
2201     {
2202       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2203       if (bb_vinfo)
2204         return bb_vinfo;
2205
2206       destroy_bb_vec_info (bb_vinfo);
2207
2208       vector_sizes &= ~current_vector_size;
2209       if (vector_sizes == 0
2210           || current_vector_size == 0)
2211         return NULL;
2212
2213       /* Try the next biggest vector size.  */
2214       current_vector_size = 1 << floor_log2 (vector_sizes);
2215       if (dump_enabled_p ())
2216         dump_printf_loc (MSG_NOTE, vect_location,
2217                          "***** Re-trying analysis with "
2218                          "vector size %d\n", current_vector_size);
2219     }
2220 }
2221
2222
2223 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2224    the number of created vector stmts depends on the unrolling factor).
2225    However, the actual number of vector stmts for every SLP node depends on
2226    VF which is set later in vect_analyze_operations ().  Hence, SLP costs
2227    should be updated.  In this function we assume that the inside costs
2228    calculated in vect_model_xxx_cost are linear in ncopies.  */
2229
2230 void
2231 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2232 {
2233   unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2234   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2235   slp_instance instance;
2236   stmt_vector_for_cost body_cost_vec;
2237   stmt_info_for_cost *si;
2238   void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2239
2240   if (dump_enabled_p ())
2241     dump_printf_loc (MSG_NOTE, vect_location,
2242                      "=== vect_update_slp_costs_according_to_vf ===");
2243
2244   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2245     {
2246       /* We assume that costs are linear in ncopies.  */
2247       int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2248
2249       /* Record the instance's instructions in the target cost model.
2250          This was delayed until here because the count of instructions
2251          isn't known beforehand.  */
2252       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2253
2254       FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2255         (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2256                               vinfo_for_stmt (si->stmt), si->misalign,
2257                               vect_body);
2258     }
2259 }
2260
2261
2262 /* For constant and loop invariant defs of SLP_NODE this function returns
2263    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2264    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2265    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2266    REDUC_INDEX is the index of the reduction operand in the statements, unless
2267    it is -1.  */
2268
2269 static void
2270 vect_get_constant_vectors (tree op, slp_tree slp_node,
2271                            vec<tree> *vec_oprnds,
2272                            unsigned int op_num, unsigned int number_of_vectors,
2273                            int reduc_index)
2274 {
2275   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2276   gimple stmt = stmts[0];
2277   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2278   unsigned nunits;
2279   tree vec_cst;
2280   tree *elts;
2281   unsigned j, number_of_places_left_in_vector;
2282   tree vector_type;
2283   tree vop;
2284   int group_size = stmts.length ();
2285   unsigned int vec_num, i;
2286   unsigned number_of_copies = 1;
2287   vec<tree> voprnds;
2288   voprnds.create (number_of_vectors);
2289   bool constant_p, is_store;
2290   tree neutral_op = NULL;
2291   enum tree_code code = gimple_expr_code (stmt);
2292   gimple def_stmt;
2293   struct loop *loop;
2294   gimple_seq ctor_seq = NULL;
2295
2296   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2297       && reduc_index != -1)
2298     {
2299       op_num = reduc_index - 1;
2300       op = gimple_op (stmt, reduc_index);
2301       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2302          we need either neutral operands or the original operands.  See
2303          get_initial_def_for_reduction() for details.  */
2304       switch (code)
2305         {
2306           case WIDEN_SUM_EXPR:
2307           case DOT_PROD_EXPR:
2308           case PLUS_EXPR:
2309           case MINUS_EXPR:
2310           case BIT_IOR_EXPR:
2311           case BIT_XOR_EXPR:
2312              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2313                neutral_op = build_real (TREE_TYPE (op), dconst0);
2314              else
2315                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2316
2317              break;
2318
2319           case MULT_EXPR:
2320              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2321                neutral_op = build_real (TREE_TYPE (op), dconst1);
2322              else
2323                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2324
2325              break;
2326
2327           case BIT_AND_EXPR:
2328             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2329             break;
2330
2331           case MAX_EXPR:
2332           case MIN_EXPR:
2333             def_stmt = SSA_NAME_DEF_STMT (op);
2334             loop = (gimple_bb (stmt))->loop_father;
2335             neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2336                                                 loop_preheader_edge (loop));
2337             break;
2338
2339           default:
2340             neutral_op = NULL;
2341         }
2342     }
2343
2344   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2345     {
2346       is_store = true;
2347       op = gimple_assign_rhs1 (stmt);
2348     }
2349   else
2350     is_store = false;
2351
2352   gcc_assert (op);
2353
2354   if (CONSTANT_CLASS_P (op))
2355     constant_p = true;
2356   else
2357     constant_p = false;
2358
2359   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2360   gcc_assert (vector_type);
2361   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2362
2363   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2364      created vectors. It is greater than 1 if unrolling is performed.
2365
2366      For example, we have two scalar operands, s1 and s2 (e.g., group of
2367      strided accesses of size two), while NUNITS is four (i.e., four scalars
2368      of this type can be packed in a vector).  The output vector will contain
2369      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2370      will be 2).
2371
2372      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2373      containing the operands.
2374
2375      For example, NUNITS is four as before, and the group size is 8
2376      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2377      {s5, s6, s7, s8}.  */
2378
2379   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2380
2381   number_of_places_left_in_vector = nunits;
2382   elts = XALLOCAVEC (tree, nunits);
2383   for (j = 0; j < number_of_copies; j++)
2384     {
2385       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2386         {
2387           if (is_store)
2388             op = gimple_assign_rhs1 (stmt);
2389           else
2390             {
2391               switch (code)
2392                 {
2393                   case COND_EXPR:
2394                     if (op_num == 0 || op_num == 1)
2395                       {
2396                         tree cond = gimple_assign_rhs1 (stmt);
2397                         op = TREE_OPERAND (cond, op_num);
2398                       }
2399                     else
2400                       {
2401                         if (op_num == 2)
2402                           op = gimple_assign_rhs2 (stmt);
2403                         else
2404                           op = gimple_assign_rhs3 (stmt);
2405                       }
2406                     break;
2407
2408                   case CALL_EXPR:
2409                     op = gimple_call_arg (stmt, op_num);
2410                     break;
2411
2412                   case LSHIFT_EXPR:
2413                   case RSHIFT_EXPR:
2414                   case LROTATE_EXPR:
2415                   case RROTATE_EXPR:
2416                     op = gimple_op (stmt, op_num + 1);
2417                     /* Unlike the other binary operators, shifts/rotates have
2418                        the shift count being int, instead of the same type as
2419                        the lhs, so make sure the scalar is the right type if
2420                        we are dealing with vectors of
2421                        long long/long/short/char.  */
2422                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2423                       op = fold_convert (TREE_TYPE (vector_type), op);
2424                     break;
2425
2426                   default:
2427                     op = gimple_op (stmt, op_num + 1);
2428                     break;
2429                 }
2430             }
2431
2432           if (reduc_index != -1)
2433             {
2434               loop = (gimple_bb (stmt))->loop_father;
2435               def_stmt = SSA_NAME_DEF_STMT (op);
2436
2437               gcc_assert (loop);
2438
2439               /* Get the def before the loop.  In reduction chain we have only
2440                  one initial value.  */
2441               if ((j != (number_of_copies - 1)
2442                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2443                        && i != 0))
2444                   && neutral_op)
2445                 op = neutral_op;
2446               else
2447                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2448                                             loop_preheader_edge (loop));
2449             }
2450
2451           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2452           number_of_places_left_in_vector--;
2453           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2454             {
2455               if (CONSTANT_CLASS_P (op))
2456                 {
2457                   op = fold_unary (VIEW_CONVERT_EXPR,
2458                                    TREE_TYPE (vector_type), op);
2459                   gcc_assert (op && CONSTANT_CLASS_P (op));
2460                 }
2461               else
2462                 {
2463                   tree new_temp
2464                     = make_ssa_name (TREE_TYPE (vector_type), NULL);
2465                   gimple init_stmt;
2466                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2467                                op);               
2468                   init_stmt
2469                     = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2470                                                     new_temp, op, NULL_TREE);
2471                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2472                   op = new_temp;
2473                 }
2474             }
2475           elts[number_of_places_left_in_vector] = op;
2476           if (!CONSTANT_CLASS_P (op))
2477             constant_p = false;
2478
2479           if (number_of_places_left_in_vector == 0)
2480             {
2481               number_of_places_left_in_vector = nunits;
2482
2483               if (constant_p)
2484                 vec_cst = build_vector (vector_type, elts);
2485               else
2486                 {
2487                   vec<constructor_elt, va_gc> *v;
2488                   unsigned k;
2489                   vec_alloc (v, nunits);
2490                   for (k = 0; k < nunits; ++k)
2491                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2492                   vec_cst = build_constructor (vector_type, v);
2493                 }
2494               voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2495                                                     vector_type, NULL));
2496               if (ctor_seq != NULL)
2497                 {
2498                   gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2499                   gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2500                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2501                                                         GSI_SAME_STMT);
2502                   ctor_seq = NULL;
2503                 }
2504             }
2505         }
2506     }
2507
2508   /* Since the vectors are created in the reverse order, we should invert
2509      them.  */
2510   vec_num = voprnds.length ();
2511   for (j = vec_num; j != 0; j--)
2512     {
2513       vop = voprnds[j - 1];
2514       vec_oprnds->quick_push (vop);
2515     }
2516
2517   voprnds.release ();
2518
2519   /* In case that VF is greater than the unrolling factor needed for the SLP
2520      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2521      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2522      to replicate the vectors.  */
2523   while (number_of_vectors > vec_oprnds->length ())
2524     {
2525       tree neutral_vec = NULL;
2526
2527       if (neutral_op)
2528         {
2529           if (!neutral_vec)
2530             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2531
2532           vec_oprnds->quick_push (neutral_vec);
2533         }
2534       else
2535         {
2536           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2537             vec_oprnds->quick_push (vop);
2538         }
2539     }
2540 }
2541
2542
2543 /* Get vectorized definitions from SLP_NODE that contains corresponding
2544    vectorized def-stmts.  */
2545
2546 static void
2547 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2548 {
2549   tree vec_oprnd;
2550   gimple vec_def_stmt;
2551   unsigned int i;
2552
2553   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2554
2555   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2556     {
2557       gcc_assert (vec_def_stmt);
2558       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2559       vec_oprnds->quick_push (vec_oprnd);
2560     }
2561 }
2562
2563
2564 /* Get vectorized definitions for SLP_NODE.
2565    If the scalar definitions are loop invariants or constants, collect them and
2566    call vect_get_constant_vectors() to create vector stmts.
2567    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2568    must be stored in the corresponding child of SLP_NODE, and we call
2569    vect_get_slp_vect_defs () to retrieve them.  */
2570
2571 void
2572 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2573                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2574 {
2575   gimple first_stmt;
2576   int number_of_vects = 0, i;
2577   unsigned int child_index = 0;
2578   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2579   slp_tree child = NULL;
2580   vec<tree> vec_defs;
2581   tree oprnd;
2582   bool vectorized_defs;
2583
2584   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2585   FOR_EACH_VEC_ELT (ops, i, oprnd)
2586     {
2587       /* For each operand we check if it has vectorized definitions in a child
2588          node or we need to create them (for invariants and constants).  We
2589          check if the LHS of the first stmt of the next child matches OPRND.
2590          If it does, we found the correct child.  Otherwise, we call
2591          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2592          to check this child node for the next operand.  */
2593       vectorized_defs = false;
2594       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2595         {
2596           child = SLP_TREE_CHILDREN (slp_node)[child_index];
2597
2598           /* We have to check both pattern and original def, if available.  */
2599           gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2600           gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2601
2602           if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2603               || (related
2604                   && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2605             {
2606               /* The number of vector defs is determined by the number of
2607                  vector statements in the node from which we get those
2608                  statements.  */
2609               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2610               vectorized_defs = true;
2611               child_index++;
2612             }
2613         }
2614
2615       if (!vectorized_defs)
2616         {
2617           if (i == 0)
2618             {
2619               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2620               /* Number of vector stmts was calculated according to LHS in
2621                  vect_schedule_slp_instance (), fix it by replacing LHS with
2622                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2623                  details.  */
2624               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2625                                              &rhs_size_unit);
2626               if (rhs_size_unit != lhs_size_unit)
2627                 {
2628                   number_of_vects *= rhs_size_unit;
2629                   number_of_vects /= lhs_size_unit;
2630                 }
2631             }
2632         }
2633
2634       /* Allocate memory for vectorized defs.  */
2635       vec_defs = vNULL;
2636       vec_defs.create (number_of_vects);
2637
2638       /* For reduction defs we call vect_get_constant_vectors (), since we are
2639          looking for initial loop invariant values.  */
2640       if (vectorized_defs && reduc_index == -1)
2641         /* The defs are already vectorized.  */
2642         vect_get_slp_vect_defs (child, &vec_defs);
2643       else
2644         /* Build vectors from scalar defs.  */
2645         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2646                                    number_of_vects, reduc_index);
2647
2648       vec_oprnds->quick_push (vec_defs);
2649
2650       /* For reductions, we only need initial values.  */
2651       if (reduc_index != -1)
2652         return;
2653     }
2654 }
2655
2656
2657 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2658    building a vector of type MASK_TYPE from it) and two input vectors placed in
2659    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2660    shifting by STRIDE elements of DR_CHAIN for every copy.
2661    (STRIDE is the number of vectorized stmts for NODE divided by the number of
2662    copies).
2663    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2664    the created stmts must be inserted.  */
2665
2666 static inline void
2667 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2668                            tree mask, int first_vec_indx, int second_vec_indx,
2669                            gimple_stmt_iterator *gsi, slp_tree node,
2670                            tree vectype, vec<tree> dr_chain,
2671                            int ncopies, int vect_stmts_counter)
2672 {
2673   tree perm_dest;
2674   gimple perm_stmt = NULL;
2675   stmt_vec_info next_stmt_info;
2676   int i, stride;
2677   tree first_vec, second_vec, data_ref;
2678
2679   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2680
2681   /* Initialize the vect stmts of NODE to properly insert the generated
2682      stmts later.  */
2683   for (i = SLP_TREE_VEC_STMTS (node).length ();
2684        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2685     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2686
2687   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2688   for (i = 0; i < ncopies; i++)
2689     {
2690       first_vec = dr_chain[first_vec_indx];
2691       second_vec = dr_chain[second_vec_indx];
2692
2693       /* Generate the permute statement.  */
2694       perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2695                                                 first_vec, second_vec, mask);
2696       data_ref = make_ssa_name (perm_dest, perm_stmt);
2697       gimple_set_lhs (perm_stmt, data_ref);
2698       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2699
2700       /* Store the vector statement in NODE.  */
2701       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2702
2703       first_vec_indx += stride;
2704       second_vec_indx += stride;
2705     }
2706
2707   /* Mark the scalar stmt as vectorized.  */
2708   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2709   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2710 }
2711
2712
2713 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2714    return in CURRENT_MASK_ELEMENT its equivalent in target specific
2715    representation.  Check that the mask is valid and return FALSE if not.
2716    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2717    the next vector, i.e., the current first vector is not needed.  */
2718
2719 static bool
2720 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2721                        int mask_nunits, bool only_one_vec, int index,
2722                        unsigned char *mask, int *current_mask_element,
2723                        bool *need_next_vector, int *number_of_mask_fixes,
2724                        bool *mask_fixed, bool *needs_first_vector)
2725 {
2726   int i;
2727
2728   /* Convert to target specific representation.  */
2729   *current_mask_element = first_mask_element + m;
2730   /* Adjust the value in case it's a mask for second and third vectors.  */
2731   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2732
2733   if (*current_mask_element < mask_nunits)
2734     *needs_first_vector = true;
2735
2736   /* We have only one input vector to permute but the mask accesses values in
2737      the next vector as well.  */
2738   if (only_one_vec && *current_mask_element >= mask_nunits)
2739     {
2740       if (dump_enabled_p ())
2741         {
2742           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2743                            "permutation requires at least two vectors ");
2744           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2745         }
2746
2747       return false;
2748     }
2749
2750   /* The mask requires the next vector.  */
2751   if (*current_mask_element >= mask_nunits * 2)
2752     {
2753       if (*needs_first_vector || *mask_fixed)
2754         {
2755           /* We either need the first vector too or have already moved to the
2756              next vector. In both cases, this permutation needs three
2757              vectors.  */
2758           if (dump_enabled_p ())
2759             {
2760               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2761                                "permutation requires at "
2762                                "least three vectors ");
2763               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2764             }
2765
2766           return false;
2767         }
2768
2769       /* We move to the next vector, dropping the first one and working with
2770          the second and the third - we need to adjust the values of the mask
2771          accordingly.  */
2772       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2773
2774       for (i = 0; i < index; i++)
2775         mask[i] -= mask_nunits * *number_of_mask_fixes;
2776
2777       (*number_of_mask_fixes)++;
2778       *mask_fixed = true;
2779     }
2780
2781   *need_next_vector = *mask_fixed;
2782
2783   /* This was the last element of this mask. Start a new one.  */
2784   if (index == mask_nunits - 1)
2785     {
2786       *number_of_mask_fixes = 1;
2787       *mask_fixed = false;
2788       *needs_first_vector = false;
2789     }
2790
2791   return true;
2792 }
2793
2794
2795 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2796    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2797    permute statements for the SLP node NODE of the SLP instance
2798    SLP_NODE_INSTANCE.  */
2799
2800 bool
2801 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2802                               gimple_stmt_iterator *gsi, int vf,
2803                               slp_instance slp_node_instance, bool analyze_only)
2804 {
2805   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2806   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2807   tree mask_element_type = NULL_TREE, mask_type;
2808   int i, j, k, nunits, vec_index = 0, scalar_index;
2809   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2810   gimple next_scalar_stmt;
2811   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2812   int first_mask_element;
2813   int index, unroll_factor, current_mask_element, ncopies;
2814   unsigned char *mask;
2815   bool only_one_vec = false, need_next_vector = false;
2816   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2817   int number_of_mask_fixes = 1;
2818   bool mask_fixed = false;
2819   bool needs_first_vector = false;
2820   enum machine_mode mode;
2821
2822   mode = TYPE_MODE (vectype);
2823
2824   if (!can_vec_perm_p (mode, false, NULL))
2825     {
2826       if (dump_enabled_p ())
2827         {
2828           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2829                            "no vect permute for ");
2830           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2831         }
2832       return false;
2833     }
2834
2835   /* The generic VEC_PERM_EXPR code always uses an integral type of the
2836      same size as the vector element being permuted.  */
2837   mask_element_type = lang_hooks.types.type_for_mode
2838                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2839   mask_type = get_vectype_for_scalar_type (mask_element_type);
2840   nunits = TYPE_VECTOR_SUBPARTS (vectype);
2841   mask = XALLOCAVEC (unsigned char, nunits);
2842   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2843
2844   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2845      unrolling factor.  */
2846   orig_vec_stmts_num = group_size *
2847                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2848   if (orig_vec_stmts_num == 1)
2849     only_one_vec = true;
2850
2851   /* Number of copies is determined by the final vectorization factor
2852      relatively to SLP_NODE_INSTANCE unrolling factor.  */
2853   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2854
2855   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2856     return false;
2857
2858   /* Generate permutation masks for every NODE. Number of masks for each NODE
2859      is equal to GROUP_SIZE.
2860      E.g., we have a group of three nodes with three loads from the same
2861      location in each node, and the vector size is 4. I.e., we have a
2862      a0b0c0a1b1c1... sequence and we need to create the following vectors:
2863      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2864      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2865      ...
2866
2867      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2868      The last mask is illegal since we assume two operands for permute
2869      operation, and the mask element values can't be outside that range.
2870      Hence, the last mask must be converted into {2,5,5,5}.
2871      For the first two permutations we need the first and the second input
2872      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2873      we need the second and the third vectors: {b1,c1,a2,b2} and
2874      {c2,a3,b3,c3}.  */
2875
2876     {
2877       scalar_index = 0;
2878       index = 0;
2879       vect_stmts_counter = 0;
2880       vec_index = 0;
2881       first_vec_index = vec_index++;
2882       if (only_one_vec)
2883         second_vec_index = first_vec_index;
2884       else
2885         second_vec_index =  vec_index++;
2886
2887       for (j = 0; j < unroll_factor; j++)
2888         {
2889           for (k = 0; k < group_size; k++)
2890             {
2891               i = SLP_TREE_LOAD_PERMUTATION (node)[k];
2892               first_mask_element = i + j * group_size;
2893               if (!vect_get_mask_element (stmt, first_mask_element, 0,
2894                                           nunits, only_one_vec, index,
2895                                           mask, &current_mask_element,
2896                                           &need_next_vector,
2897                                           &number_of_mask_fixes, &mask_fixed,
2898                                           &needs_first_vector))
2899                 return false;
2900               mask[index++] = current_mask_element;
2901
2902               if (index == nunits)
2903                 {
2904                   index = 0;
2905                   if (!can_vec_perm_p (mode, false, mask))
2906                     {
2907                       if (dump_enabled_p ())
2908                         {
2909                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2910                                            vect_location, 
2911                                            "unsupported vect permute { ");
2912                           for (i = 0; i < nunits; ++i)
2913                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2914                                          mask[i]);
2915                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2916                         }
2917                       return false;
2918                     }
2919
2920                   if (!analyze_only)
2921                     {
2922                       int l;
2923                       tree mask_vec, *mask_elts;
2924                       mask_elts = XALLOCAVEC (tree, nunits);
2925                       for (l = 0; l < nunits; ++l)
2926                         mask_elts[l] = build_int_cst (mask_element_type,
2927                                                       mask[l]);
2928                       mask_vec = build_vector (mask_type, mask_elts);
2929
2930                       if (need_next_vector)
2931                         {
2932                           first_vec_index = second_vec_index;
2933                           second_vec_index = vec_index;
2934                         }
2935
2936                       next_scalar_stmt
2937                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2938
2939                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
2940                                mask_vec, first_vec_index, second_vec_index,
2941                                gsi, node, vectype, dr_chain,
2942                                ncopies, vect_stmts_counter++);
2943                     }
2944                 }
2945             }
2946         }
2947     }
2948
2949   return true;
2950 }
2951
2952
2953
2954 /* Vectorize SLP instance tree in postorder.  */
2955
2956 static bool
2957 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2958                             unsigned int vectorization_factor)
2959 {
2960   gimple stmt;
2961   bool grouped_store, is_store;
2962   gimple_stmt_iterator si;
2963   stmt_vec_info stmt_info;
2964   unsigned int vec_stmts_size, nunits, group_size;
2965   tree vectype;
2966   int i;
2967   slp_tree child;
2968
2969   if (!node)
2970     return false;
2971
2972   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2973     vect_schedule_slp_instance (child, instance, vectorization_factor);
2974
2975   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2976   stmt_info = vinfo_for_stmt (stmt);
2977
2978   /* VECTYPE is the type of the destination.  */
2979   vectype = STMT_VINFO_VECTYPE (stmt_info);
2980   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2981   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2982
2983   /* For each SLP instance calculate number of vector stmts to be created
2984      for the scalar stmts in each node of the SLP tree.  Number of vector
2985      elements in one vector iteration is the number of scalar elements in
2986      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2987      size.  */
2988   vec_stmts_size = (vectorization_factor * group_size) / nunits;
2989
2990   if (!SLP_TREE_VEC_STMTS (node).exists ())
2991     {
2992       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
2993       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2994     }
2995
2996   if (dump_enabled_p ())
2997     {
2998       dump_printf_loc (MSG_NOTE,vect_location,
2999                        "------>vectorizing SLP node starting from: ");
3000       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3001     }
3002
3003   /* Loads should be inserted before the first load.  */
3004   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3005       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3006       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3007       && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3008     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3009   else if (is_pattern_stmt_p (stmt_info))
3010     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3011   else
3012     si = gsi_for_stmt (stmt);
3013
3014   /* Stores should be inserted just before the last store.  */
3015   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3016       && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3017     { 
3018       gimple last_store = vect_find_last_store_in_slp_instance (instance);
3019       if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3020        last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3021       si = gsi_for_stmt (last_store);
3022     }
3023
3024   /* Mark the first element of the reduction chain as reduction to properly
3025      transform the node.  In the analysis phase only the last element of the
3026      chain is marked as reduction.  */
3027   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3028       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3029     {
3030       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3031       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3032     }
3033
3034   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3035   return is_store;
3036 }
3037
3038 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3039    For loop vectorization this is done in vectorizable_call, but for SLP
3040    it needs to be deferred until end of vect_schedule_slp, because multiple
3041    SLP instances may refer to the same scalar stmt.  */
3042
3043 static void
3044 vect_remove_slp_scalar_calls (slp_tree node)
3045 {
3046   gimple stmt, new_stmt;
3047   gimple_stmt_iterator gsi;
3048   int i;
3049   slp_tree child;
3050   tree lhs;
3051   stmt_vec_info stmt_info;
3052
3053   if (!node)
3054     return;
3055
3056   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3057     vect_remove_slp_scalar_calls (child);
3058
3059   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3060     {
3061       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3062         continue;
3063       stmt_info = vinfo_for_stmt (stmt);
3064       if (stmt_info == NULL
3065           || is_pattern_stmt_p (stmt_info)
3066           || !PURE_SLP_STMT (stmt_info))
3067         continue;
3068       lhs = gimple_call_lhs (stmt);
3069       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3070       set_vinfo_for_stmt (new_stmt, stmt_info);
3071       set_vinfo_for_stmt (stmt, NULL);
3072       STMT_VINFO_STMT (stmt_info) = new_stmt;
3073       gsi = gsi_for_stmt (stmt);
3074       gsi_replace (&gsi, new_stmt, false);
3075       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3076     }
3077 }
3078
3079 /* Generate vector code for all SLP instances in the loop/basic block.  */
3080
3081 bool
3082 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3083 {
3084   vec<slp_instance> slp_instances;
3085   slp_instance instance;
3086   unsigned int i, vf;
3087   bool is_store = false;
3088
3089   if (loop_vinfo)
3090     {
3091       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3092       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3093     }
3094   else
3095     {
3096       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3097       vf = 1;
3098     }
3099
3100   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3101     {
3102       /* Schedule the tree of INSTANCE.  */
3103       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3104                                              instance, vf);
3105       if (dump_enabled_p ())
3106         dump_printf_loc (MSG_NOTE, vect_location,
3107                          "vectorizing stmts using SLP.");
3108     }
3109
3110   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3111     {
3112       slp_tree root = SLP_INSTANCE_TREE (instance);
3113       gimple store;
3114       unsigned int j;
3115       gimple_stmt_iterator gsi;
3116
3117       /* Remove scalar call stmts.  Do not do this for basic-block
3118          vectorization as not all uses may be vectorized.
3119          ???  Why should this be necessary?  DCE should be able to
3120          remove the stmts itself.
3121          ???  For BB vectorization we can as well remove scalar
3122          stmts starting from the SLP tree root if they have no
3123          uses.  */
3124       if (loop_vinfo)
3125         vect_remove_slp_scalar_calls (root);
3126
3127       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3128                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3129         {
3130           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3131             break;
3132
3133          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3134            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3135           /* Free the attached stmt_vec_info and remove the stmt.  */
3136           gsi = gsi_for_stmt (store);
3137           unlink_stmt_vdef (store);
3138           gsi_remove (&gsi, true);
3139           release_defs (store);
3140           free_stmt_vec_info (store);
3141         }
3142     }
3143
3144   return is_store;
3145 }
3146
3147
3148 /* Vectorize the basic block.  */
3149
3150 void
3151 vect_slp_transform_bb (basic_block bb)
3152 {
3153   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3154   gimple_stmt_iterator si;
3155
3156   gcc_assert (bb_vinfo);
3157
3158   if (dump_enabled_p ())
3159     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3160
3161   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3162     {
3163       gimple stmt = gsi_stmt (si);
3164       stmt_vec_info stmt_info;
3165
3166       if (dump_enabled_p ())
3167         {
3168           dump_printf_loc (MSG_NOTE, vect_location,
3169                            "------>SLPing statement: ");
3170           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3171         }
3172
3173       stmt_info = vinfo_for_stmt (stmt);
3174       gcc_assert (stmt_info);
3175
3176       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3177       if (STMT_SLP_TYPE (stmt_info))
3178         {
3179           vect_schedule_slp (NULL, bb_vinfo);
3180           break;
3181         }
3182     }
3183
3184   if (dump_enabled_p ())
3185     dump_printf_loc (MSG_NOTE, vect_location,
3186                      "BASIC BLOCK VECTORIZED\n");
3187
3188   destroy_bb_vec_info (bb_vinfo);
3189 }