gimple.h: Remove all includes.
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2013 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-ssa-alias.h"
33 #include "internal-fn.h"
34 #include "gimple-expr.h"
35 #include "is-a.h"
36 #include "gimple.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "tree-pass.h"
44 #include "cfgloop.h"
45 #include "expr.h"
46 #include "recog.h"              /* FIXME: for insn_data */
47 #include "optabs.h"
48 #include "tree-vectorizer.h"
49 #include "langhooks.h"
50
51 /* Extract the location of the basic block in the source code.
52    Return the basic block location if succeed and NULL if not.  */
53
54 source_location
55 find_bb_location (basic_block bb)
56 {
57   gimple stmt = NULL;
58   gimple_stmt_iterator si;
59
60   if (!bb)
61     return UNKNOWN_LOCATION;
62
63   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
64     {
65       stmt = gsi_stmt (si);
66       if (gimple_location (stmt) != UNKNOWN_LOCATION)
67         return gimple_location (stmt);
68     }
69
70   return UNKNOWN_LOCATION;
71 }
72
73
74 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
75
76 static void
77 vect_free_slp_tree (slp_tree node)
78 {
79   int i;
80   slp_tree child;
81
82   if (!node)
83     return;
84
85   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
86     vect_free_slp_tree (child);
87
88   SLP_TREE_CHILDREN (node).release ();
89   SLP_TREE_SCALAR_STMTS (node).release ();
90   SLP_TREE_VEC_STMTS (node).release ();
91   SLP_TREE_LOAD_PERMUTATION (node).release ();
92
93   free (node);
94 }
95
96
97 /* Free the memory allocated for the SLP instance.  */
98
99 void
100 vect_free_slp_instance (slp_instance instance)
101 {
102   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
103   SLP_INSTANCE_LOADS (instance).release ();
104   SLP_INSTANCE_BODY_COST_VEC (instance).release ();
105   free (instance);
106 }
107
108
109 /* Create an SLP node for SCALAR_STMTS.  */
110
111 static slp_tree
112 vect_create_new_slp_node (vec<gimple> scalar_stmts)
113 {
114   slp_tree node;
115   gimple stmt = scalar_stmts[0];
116   unsigned int nops;
117
118   if (is_gimple_call (stmt))
119     nops = gimple_call_num_args (stmt);
120   else if (is_gimple_assign (stmt))
121     {
122       nops = gimple_num_ops (stmt) - 1;
123       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
124         nops++;
125     }
126   else
127     return NULL;
128
129   node = XNEW (struct _slp_tree);
130   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
131   SLP_TREE_VEC_STMTS (node).create (0);
132   SLP_TREE_CHILDREN (node).create (nops);
133   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
134
135   return node;
136 }
137
138
139 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
140    operand.  */
141 static vec<slp_oprnd_info> 
142 vect_create_oprnd_info (int nops, int group_size)
143 {
144   int i;
145   slp_oprnd_info oprnd_info;
146   vec<slp_oprnd_info> oprnds_info;
147
148   oprnds_info.create (nops);
149   for (i = 0; i < nops; i++)
150     {
151       oprnd_info = XNEW (struct _slp_oprnd_info);
152       oprnd_info->def_stmts.create (group_size);
153       oprnd_info->first_dt = vect_uninitialized_def;
154       oprnd_info->first_op_type = NULL_TREE;
155       oprnd_info->first_pattern = false;
156       oprnds_info.quick_push (oprnd_info);
157     }
158
159   return oprnds_info;
160 }
161
162
163 /* Free operands info.  */
164
165 static void
166 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
167 {
168   int i;
169   slp_oprnd_info oprnd_info;
170
171   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
172     {
173       oprnd_info->def_stmts.release ();
174       XDELETE (oprnd_info);
175     }
176
177   oprnds_info.release ();
178 }
179
180
181 /* Find the place of the data-ref in STMT in the interleaving chain that starts
182    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
183
184 static int
185 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
186 {
187   gimple next_stmt = first_stmt;
188   int result = 0;
189
190   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
191     return -1;
192
193   do
194     {
195       if (next_stmt == stmt)
196         return result;
197       result++;
198       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
199     }
200   while (next_stmt);
201
202   return -1;
203 }
204
205
206 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
207    they are of a valid type and that they match the defs of the first stmt of
208    the SLP group (stored in OPRNDS_INFO).  */
209
210 static bool
211 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
212                              gimple stmt, bool first,
213                              vec<slp_oprnd_info> *oprnds_info)
214 {
215   tree oprnd;
216   unsigned int i, number_of_oprnds;
217   tree def;
218   gimple def_stmt;
219   enum vect_def_type dt = vect_uninitialized_def;
220   struct loop *loop = NULL;
221   bool pattern = false;
222   slp_oprnd_info oprnd_info;
223   int op_idx = 1;
224   tree compare_rhs = NULL_TREE;
225
226   if (loop_vinfo)
227     loop = LOOP_VINFO_LOOP (loop_vinfo);
228
229   if (is_gimple_call (stmt))
230     {
231       number_of_oprnds = gimple_call_num_args (stmt);
232       op_idx = 3;
233     }
234   else if (is_gimple_assign (stmt))
235     {
236       number_of_oprnds = gimple_num_ops (stmt) - 1;
237       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
238         number_of_oprnds++;
239     }
240   else
241     return false;
242
243   for (i = 0; i < number_of_oprnds; i++)
244     {
245       if (compare_rhs)
246         {
247           oprnd = compare_rhs;
248           compare_rhs = NULL_TREE;
249         }
250       else
251         oprnd = gimple_op (stmt, op_idx++);
252
253       oprnd_info = (*oprnds_info)[i];
254
255       if (COMPARISON_CLASS_P (oprnd))
256         {
257           compare_rhs = TREE_OPERAND (oprnd, 1);
258           oprnd = TREE_OPERAND (oprnd, 0);
259         }
260
261       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
262                                &def, &dt)
263           || (!def_stmt && dt != vect_constant_def))
264         {
265           if (dump_enabled_p ())
266             {
267               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268                                "Build SLP failed: can't find def for ");
269               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
270               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
271             }
272
273           return false;
274         }
275
276       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
277          from the pattern.  Check that all the stmts of the node are in the
278          pattern.  */
279       if (def_stmt && gimple_bb (def_stmt)
280           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
281               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
282                   && gimple_code (def_stmt) != GIMPLE_PHI))
283           && vinfo_for_stmt (def_stmt)
284           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
285           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
286           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
287         {
288           pattern = true;
289           if (!first && !oprnd_info->first_pattern)
290             {
291               if (dump_enabled_p ())
292                 {
293                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
294                                    "Build SLP failed: some of the stmts"
295                                    " are in a pattern, and others are not ");
296                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
297                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
298                 }
299
300               return false;
301             }
302
303           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
304           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
305
306           if (dt == vect_unknown_def_type)
307             {
308               if (dump_enabled_p ())
309                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
310                                  "Unsupported pattern.\n");
311               return false;
312             }
313
314           switch (gimple_code (def_stmt))
315             {
316               case GIMPLE_PHI:
317                 def = gimple_phi_result (def_stmt);
318                 break;
319
320               case GIMPLE_ASSIGN:
321                 def = gimple_assign_lhs (def_stmt);
322                 break;
323
324               default:
325                 if (dump_enabled_p ())
326                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
327                                    "unsupported defining stmt:\n");
328                 return false;
329             }
330         }
331
332       if (first)
333         {
334           oprnd_info->first_dt = dt;
335           oprnd_info->first_pattern = pattern;
336           oprnd_info->first_op_type = TREE_TYPE (oprnd);
337         }
338       else
339         {
340           /* Not first stmt of the group, check that the def-stmt/s match
341              the def-stmt/s of the first stmt.  Allow different definition
342              types for reduction chains: the first stmt must be a
343              vect_reduction_def (a phi node), and the rest
344              vect_internal_def.  */
345           if (((oprnd_info->first_dt != dt
346                 && !(oprnd_info->first_dt == vect_reduction_def
347                      && dt == vect_internal_def)
348                 && !((oprnd_info->first_dt == vect_external_def
349                       || oprnd_info->first_dt == vect_constant_def)
350                      && (dt == vect_external_def
351                          || dt == vect_constant_def)))
352                || !types_compatible_p (oprnd_info->first_op_type,
353                                        TREE_TYPE (oprnd))))
354             {
355               if (dump_enabled_p ())
356                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
357                                  "Build SLP failed: different types\n");
358
359               return false;
360             }
361         }
362
363       /* Check the types of the definitions.  */
364       switch (dt)
365         {
366         case vect_constant_def:
367         case vect_external_def:
368         case vect_reduction_def:
369           break;
370
371         case vect_internal_def:
372           oprnd_info->def_stmts.quick_push (def_stmt);
373           break;
374
375         default:
376           /* FORNOW: Not supported.  */
377           if (dump_enabled_p ())
378             {
379               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
380                                "Build SLP failed: illegal type of def ");
381               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
382               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
383             }
384
385           return false;
386         }
387     }
388
389   return true;
390 }
391
392
393 /* Verify if the scalar stmts STMTS are isomorphic, require data
394    permutation or are of unsupported types of operation.  Return
395    true if they are, otherwise return false and indicate in *MATCHES
396    which stmts are not isomorphic to the first one.  If MATCHES[0]
397    is false then this indicates the comparison could not be
398    carried out or the stmts will never be vectorized by SLP.  */
399
400 static bool
401 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
402                        vec<gimple> stmts, unsigned int group_size,
403                        unsigned nops, unsigned int *max_nunits,
404                        unsigned int vectorization_factor, bool *matches)
405 {
406   unsigned int i;
407   gimple stmt = stmts[0];
408   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
409   enum tree_code first_cond_code = ERROR_MARK;
410   tree lhs;
411   bool need_same_oprnds = false;
412   tree vectype, scalar_type, first_op1 = NULL_TREE;
413   optab optab;
414   int icode;
415   enum machine_mode optab_op2_mode;
416   enum machine_mode vec_mode;
417   struct data_reference *first_dr;
418   HOST_WIDE_INT dummy;
419   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
420   tree cond;
421
422   /* For every stmt in NODE find its def stmt/s.  */
423   FOR_EACH_VEC_ELT (stmts, i, stmt)
424     {
425       matches[i] = false;
426
427       if (dump_enabled_p ())
428         {
429           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
430           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
431           dump_printf (MSG_NOTE, "\n");
432         }
433
434       /* Fail to vectorize statements marked as unvectorizable.  */
435       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
436         {
437           if (dump_enabled_p ())
438             {
439               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440                                "Build SLP failed: unvectorizable statement ");
441               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
442               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
443             }
444           /* Fatal mismatch.  */
445           matches[0] = false;
446           return false;
447         }
448
449       lhs = gimple_get_lhs (stmt);
450       if (lhs == NULL_TREE)
451         {
452           if (dump_enabled_p ())
453             {
454               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
455                                "Build SLP failed: not GIMPLE_ASSIGN nor "
456                                "GIMPLE_CALL ");
457               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
458               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
459             }
460           /* Fatal mismatch.  */
461           matches[0] = false;
462           return false;
463         }
464
465        if (is_gimple_assign (stmt)
466            && gimple_assign_rhs_code (stmt) == COND_EXPR
467            && (cond = gimple_assign_rhs1 (stmt))
468            && !COMPARISON_CLASS_P (cond))
469         {
470           if (dump_enabled_p ())
471             {
472               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
473                                "Build SLP failed: condition is not "
474                                "comparison ");
475               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
476               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
477             }
478           /* Fatal mismatch.  */
479           matches[0] = false;
480           return false;
481         }
482
483       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
484       vectype = get_vectype_for_scalar_type (scalar_type);
485       if (!vectype)
486         {
487           if (dump_enabled_p ())
488             {
489               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
490                                "Build SLP failed: unsupported data-type ");
491               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
492                                  scalar_type);
493               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
494             }
495           /* Fatal mismatch.  */
496           matches[0] = false;
497           return false;
498         }
499
500       /* In case of multiple types we need to detect the smallest type.  */
501       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
502         {
503           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
504           if (bb_vinfo)
505             vectorization_factor = *max_nunits;
506         }
507
508       if (is_gimple_call (stmt))
509         {
510           rhs_code = CALL_EXPR;
511           if (gimple_call_internal_p (stmt)
512               || gimple_call_tail_p (stmt)
513               || gimple_call_noreturn_p (stmt)
514               || !gimple_call_nothrow_p (stmt)
515               || gimple_call_chain (stmt))
516             {
517               if (dump_enabled_p ())
518                 {
519                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
520                                    "Build SLP failed: unsupported call type ");
521                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
522                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
523                 }
524               /* Fatal mismatch.  */
525               matches[0] = false;
526               return false;
527             }
528         }
529       else
530         rhs_code = gimple_assign_rhs_code (stmt);
531
532       /* Check the operation.  */
533       if (i == 0)
534         {
535           first_stmt_code = rhs_code;
536
537           /* Shift arguments should be equal in all the packed stmts for a
538              vector shift with scalar shift operand.  */
539           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
540               || rhs_code == LROTATE_EXPR
541               || rhs_code == RROTATE_EXPR)
542             {
543               vec_mode = TYPE_MODE (vectype);
544
545               /* First see if we have a vector/vector shift.  */
546               optab = optab_for_tree_code (rhs_code, vectype,
547                                            optab_vector);
548
549               if (!optab
550                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
551                 {
552                   /* No vector/vector shift, try for a vector/scalar shift.  */
553                   optab = optab_for_tree_code (rhs_code, vectype,
554                                                optab_scalar);
555
556                   if (!optab)
557                     {
558                       if (dump_enabled_p ())
559                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
560                                          "Build SLP failed: no optab.\n");
561                       /* Fatal mismatch.  */
562                       matches[0] = false;
563                       return false;
564                     }
565                   icode = (int) optab_handler (optab, vec_mode);
566                   if (icode == CODE_FOR_nothing)
567                     {
568                       if (dump_enabled_p ())
569                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
570                                          "Build SLP failed: "
571                                          "op not supported by target.\n");
572                       /* Fatal mismatch.  */
573                       matches[0] = false;
574                       return false;
575                     }
576                   optab_op2_mode = insn_data[icode].operand[2].mode;
577                   if (!VECTOR_MODE_P (optab_op2_mode))
578                     {
579                       need_same_oprnds = true;
580                       first_op1 = gimple_assign_rhs2 (stmt);
581                     }
582                 }
583             }
584           else if (rhs_code == WIDEN_LSHIFT_EXPR)
585             {
586               need_same_oprnds = true;
587               first_op1 = gimple_assign_rhs2 (stmt);
588             }
589         }
590       else
591         {
592           if (first_stmt_code != rhs_code
593               && (first_stmt_code != IMAGPART_EXPR
594                   || rhs_code != REALPART_EXPR)
595               && (first_stmt_code != REALPART_EXPR
596                   || rhs_code != IMAGPART_EXPR)
597               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
598                    && (first_stmt_code == ARRAY_REF
599                        || first_stmt_code == BIT_FIELD_REF
600                        || first_stmt_code == INDIRECT_REF
601                        || first_stmt_code == COMPONENT_REF
602                        || first_stmt_code == MEM_REF)))
603             {
604               if (dump_enabled_p ())
605                 {
606                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
607                                    "Build SLP failed: different operation "
608                                    "in stmt ");
609                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
610                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
611                 }
612               /* Mismatch.  */
613               continue;
614             }
615
616           if (need_same_oprnds
617               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
618             {
619               if (dump_enabled_p ())
620                 {
621                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
622                                    "Build SLP failed: different shift "
623                                    "arguments in ");
624                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
625                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
626                 }
627               /* Mismatch.  */
628               continue;
629             }
630
631           if (rhs_code == CALL_EXPR)
632             {
633               gimple first_stmt = stmts[0];
634               if (gimple_call_num_args (stmt) != nops
635                   || !operand_equal_p (gimple_call_fn (first_stmt),
636                                        gimple_call_fn (stmt), 0)
637                   || gimple_call_fntype (first_stmt)
638                      != gimple_call_fntype (stmt))
639                 {
640                   if (dump_enabled_p ())
641                     {
642                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
643                                        "Build SLP failed: different calls in ");
644                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
645                                         stmt, 0);
646                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
647                     }
648                   /* Mismatch.  */
649                   continue;
650                 }
651             }
652         }
653
654       /* Grouped store or load.  */
655       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
656         {
657           if (REFERENCE_CLASS_P (lhs))
658             {
659               /* Store.  */
660               ;
661             }
662           else
663             {
664               /* Load.  */
665               unsigned unrolling_factor
666                 = least_common_multiple
667                     (*max_nunits, group_size) / group_size;
668               /* FORNOW: Check that there is no gap between the loads
669                  and no gap between the groups when we need to load
670                  multiple groups at once.
671                  ???  We should enhance this to only disallow gaps
672                  inside vectors.  */
673               if ((unrolling_factor > 1
674                    && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
675                    && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
676                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
677                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
678                 {
679                   if (dump_enabled_p ())
680                     {
681                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682                                        "Build SLP failed: grouped "
683                                        "loads have gaps ");
684                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
685                                         stmt, 0);
686                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
687                     }
688                   /* Fatal mismatch.  */
689                   matches[0] = false;
690                   return false;
691                 }
692
693               /* Check that the size of interleaved loads group is not
694                  greater than the SLP group size.  */
695               unsigned ncopies
696                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
697               if (loop_vinfo
698                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
699                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
700                        - GROUP_GAP (vinfo_for_stmt (stmt)))
701                       > ncopies * group_size))
702                 {
703                   if (dump_enabled_p ())
704                     {
705                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
706                                        "Build SLP failed: the number "
707                                        "of interleaved loads is greater than "
708                                        "the SLP group size ");
709                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
710                                         stmt, 0);
711                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
712                     }
713                   /* Fatal mismatch.  */
714                   matches[0] = false;
715                   return false;
716                 }
717
718               old_first_load = first_load;
719               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
720               if (prev_first_load)
721                 {
722                   /* Check that there are no loads from different interleaving
723                      chains in the same node.  */
724                   if (prev_first_load != first_load)
725                     {
726                       if (dump_enabled_p ())
727                         {
728                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
729                                            vect_location, 
730                                            "Build SLP failed: different "
731                                            "interleaving chains in one node ");
732                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
733                                             stmt, 0);
734                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
735                         }
736                       /* Mismatch.  */
737                       continue;
738                     }
739                 }
740               else
741                 prev_first_load = first_load;
742
743               /* In some cases a group of loads is just the same load
744                  repeated N times.  Only analyze its cost once.  */
745               if (first_load == stmt && old_first_load != first_load)
746                 {
747                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
748                   if (vect_supportable_dr_alignment (first_dr, false)
749                       == dr_unaligned_unsupported)
750                     {
751                       if (dump_enabled_p ())
752                         {
753                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
754                                            vect_location, 
755                                            "Build SLP failed: unsupported "
756                                            "unaligned load ");
757                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
758                                             stmt, 0);
759                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
760                         }
761                       /* Fatal mismatch.  */
762                       matches[0] = false;
763                       return false;
764                     }
765                 }
766            }
767         } /* Grouped access.  */
768       else
769         {
770           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
771             {
772               /* Not grouped load.  */
773               if (dump_enabled_p ())
774                 {
775                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
776                                    "Build SLP failed: not grouped load ");
777                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
778                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
779                 }
780
781               /* FORNOW: Not grouped loads are not supported.  */
782               /* Fatal mismatch.  */
783               matches[0] = false;
784               return false;
785             }
786
787           /* Not memory operation.  */
788           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
789               && TREE_CODE_CLASS (rhs_code) != tcc_unary
790               && rhs_code != COND_EXPR
791               && rhs_code != CALL_EXPR)
792             {
793               if (dump_enabled_p ())
794                 {
795                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
796                                    "Build SLP failed: operation");
797                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
798                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
799                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
800                 }
801               /* Fatal mismatch.  */
802               matches[0] = false;
803               return false;
804             }
805
806           if (rhs_code == COND_EXPR)
807             {
808               tree cond_expr = gimple_assign_rhs1 (stmt);
809
810               if (i == 0)
811                 first_cond_code = TREE_CODE (cond_expr);
812               else if (first_cond_code != TREE_CODE (cond_expr))
813                 {
814                   if (dump_enabled_p ())
815                     {
816                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
817                                        "Build SLP failed: different"
818                                        " operation");
819                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
820                                         stmt, 0);
821                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
822                     }
823                   /* Mismatch.  */
824                   continue;
825                 }
826             }
827         }
828
829       matches[i] = true;
830     }
831
832   for (i = 0; i < group_size; ++i)
833     if (!matches[i])
834       return false;
835
836   return true;
837 }
838
839 /* Recursively build an SLP tree starting from NODE.
840    Fail (and return a value not equal to zero) if def-stmts are not
841    isomorphic, require data permutation or are of unsupported types of
842    operation.  Otherwise, return 0.
843    The value returned is the depth in the SLP tree where a mismatch
844    was found.  */
845
846 static bool
847 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
848                      slp_tree *node, unsigned int group_size,
849                      unsigned int *max_nunits,
850                      vec<slp_tree> *loads,
851                      unsigned int vectorization_factor,
852                      bool *matches, unsigned *npermutes)
853 {
854   unsigned nops, i, this_npermutes = 0;
855   gimple stmt;
856
857   if (!matches)
858     matches = XALLOCAVEC (bool, group_size);
859   if (!npermutes)
860     npermutes = &this_npermutes;
861
862   matches[0] = false;
863
864   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
865   if (is_gimple_call (stmt))
866     nops = gimple_call_num_args (stmt);
867   else if (is_gimple_assign (stmt))
868     {
869       nops = gimple_num_ops (stmt) - 1;
870       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
871         nops++;
872     }
873   else
874     return false;
875
876   if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
877                               SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
878                               max_nunits, vectorization_factor, matches))
879     return false;
880
881   /* If the SLP node is a load, terminate the recursion.  */
882   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
883       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
884     {
885       loads->safe_push (*node);
886       return true;
887     }
888
889   /* Get at the operands, verifying they are compatible.  */
890   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
891   slp_oprnd_info oprnd_info;
892   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
893     {
894       if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
895                                         stmt, (i == 0), &oprnds_info))
896         {
897           vect_free_oprnd_info (oprnds_info);
898           return false;
899         }
900     }
901
902   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
903
904   /* Create SLP_TREE nodes for the definition node/s.  */
905   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
906     {
907       slp_tree child;
908       unsigned old_nloads = loads->length ();
909       unsigned old_max_nunits = *max_nunits;
910
911       if (oprnd_info->first_dt != vect_internal_def)
912         continue;
913
914       child = vect_create_new_slp_node (oprnd_info->def_stmts);
915       if (!child)
916         {
917           vect_free_oprnd_info (oprnds_info);
918           return false;
919         }
920
921       bool *matches = XALLOCAVEC (bool, group_size);
922       if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
923                                group_size, max_nunits, loads,
924                                vectorization_factor, matches, npermutes))
925         {
926           oprnd_info->def_stmts = vNULL;
927           SLP_TREE_CHILDREN (*node).quick_push (child);
928           continue;
929         }
930
931       /* If the SLP build for operand zero failed and operand zero
932          and one can be commutated try that for the scalar stmts
933          that failed the match.  */
934       if (i == 0
935           /* A first scalar stmt mismatch signals a fatal mismatch.  */
936           && matches[0]
937           /* ???  For COND_EXPRs we can swap the comparison operands
938              as well as the arms under some constraints.  */
939           && nops == 2
940           && oprnds_info[1]->first_dt == vect_internal_def
941           && is_gimple_assign (stmt)
942           && commutative_tree_code (gimple_assign_rhs_code (stmt))
943           /* Do so only if the number of not successful permutes was nor more
944              than a cut-ff as re-trying the recursive match on
945              possibly each level of the tree would expose exponential
946              behavior.  */
947           && *npermutes < 4)
948         {
949           /* Roll back.  */
950           *max_nunits = old_max_nunits;
951           loads->truncate (old_nloads);
952           /* Swap mismatched definition stmts.  */
953           for (unsigned j = 0; j < group_size; ++j)
954             if (!matches[j])
955               {
956                 gimple tem = oprnds_info[0]->def_stmts[j];
957                 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
958                 oprnds_info[1]->def_stmts[j] = tem;
959               }
960           /* And try again ... */
961           if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
962                                    group_size, max_nunits, loads,
963                                    vectorization_factor,
964                                    matches, npermutes))
965             {
966               oprnd_info->def_stmts = vNULL;
967               SLP_TREE_CHILDREN (*node).quick_push (child);
968               continue;
969             }
970
971           ++*npermutes;
972         }
973
974       oprnd_info->def_stmts = vNULL;
975       vect_free_slp_tree (child);
976       vect_free_oprnd_info (oprnds_info);
977       return false;
978     }
979
980   vect_free_oprnd_info (oprnds_info);
981   return true;
982 }
983
984 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
985
986 static void
987 vect_print_slp_tree (int dump_kind, slp_tree node)
988 {
989   int i;
990   gimple stmt;
991   slp_tree child;
992
993   if (!node)
994     return;
995
996   dump_printf (dump_kind, "node ");
997   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
998     {
999       dump_printf (dump_kind, "\n\tstmt %d ", i);
1000       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1001     }
1002   dump_printf (dump_kind, "\n");
1003
1004   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1005     vect_print_slp_tree (dump_kind, child);
1006 }
1007
1008
1009 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1010    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1011    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1012    stmts in NODE are to be marked.  */
1013
1014 static void
1015 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1016 {
1017   int i;
1018   gimple stmt;
1019   slp_tree child;
1020
1021   if (!node)
1022     return;
1023
1024   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1025     if (j < 0 || i == j)
1026       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1027
1028   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1029     vect_mark_slp_stmts (child, mark, j);
1030 }
1031
1032
1033 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1034
1035 static void
1036 vect_mark_slp_stmts_relevant (slp_tree node)
1037 {
1038   int i;
1039   gimple stmt;
1040   stmt_vec_info stmt_info;
1041   slp_tree child;
1042
1043   if (!node)
1044     return;
1045
1046   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1047     {
1048       stmt_info = vinfo_for_stmt (stmt);
1049       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1050                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1051       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1052     }
1053
1054   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1055     vect_mark_slp_stmts_relevant (child);
1056 }
1057
1058
1059 /* Rearrange the statements of NODE according to PERMUTATION.  */
1060
1061 static void
1062 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1063                           vec<unsigned> permutation)
1064 {
1065   gimple stmt;
1066   vec<gimple> tmp_stmts;
1067   unsigned int i;
1068   slp_tree child;
1069
1070   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1071     vect_slp_rearrange_stmts (child, group_size, permutation);
1072
1073   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1074   tmp_stmts.create (group_size);
1075   tmp_stmts.quick_grow_cleared (group_size);
1076
1077   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1078     tmp_stmts[permutation[i]] = stmt;
1079
1080   SLP_TREE_SCALAR_STMTS (node).release ();
1081   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1082 }
1083
1084
1085 /* Check if the required load permutations in the SLP instance
1086    SLP_INSTN are supported.  */
1087
1088 static bool
1089 vect_supported_load_permutation_p (slp_instance slp_instn)
1090 {
1091   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1092   unsigned int i, j, k, next;
1093   sbitmap load_index;
1094   slp_tree node;
1095   gimple stmt, load, next_load, first_load;
1096   struct data_reference *dr;
1097
1098   if (dump_enabled_p ())
1099     {
1100       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1101       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1102         if (node->load_permutation.exists ())
1103           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1104             dump_printf (MSG_NOTE, "%d ", next);
1105         else
1106           for (i = 0; i < group_size; ++i)
1107             dump_printf (MSG_NOTE, "%d ", i);
1108       dump_printf (MSG_NOTE, "\n");
1109     }
1110
1111   /* In case of reduction every load permutation is allowed, since the order
1112      of the reduction statements is not important (as opposed to the case of
1113      grouped stores).  The only condition we need to check is that all the
1114      load nodes are of the same size and have the same permutation (and then
1115      rearrange all the nodes of the SLP instance according to this 
1116      permutation).  */
1117
1118   /* Check that all the load nodes are of the same size.  */
1119   /* ???  Can't we assert this? */
1120   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1121     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1122       return false;
1123
1124   node = SLP_INSTANCE_TREE (slp_instn);
1125   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1126
1127   /* Reduction (there are no data-refs in the root).
1128      In reduction chain the order of the loads is important.  */
1129   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1130       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1131     {
1132       slp_tree load;
1133       unsigned int lidx;
1134
1135       /* Compare all the permutation sequences to the first one.  We know
1136          that at least one load is permuted.  */
1137       node = SLP_INSTANCE_LOADS (slp_instn)[0];
1138       if (!node->load_permutation.exists ())
1139         return false;
1140       for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1141         {
1142           if (!load->load_permutation.exists ())
1143             return false;
1144           FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1145             if (lidx != node->load_permutation[j])
1146               return false;
1147         }
1148
1149       /* Check that the loads in the first sequence are different and there
1150          are no gaps between them.  */
1151       load_index = sbitmap_alloc (group_size);
1152       bitmap_clear (load_index);
1153       FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1154         {
1155           if (bitmap_bit_p (load_index, lidx))
1156             {
1157               sbitmap_free (load_index);
1158               return false;
1159             }
1160           bitmap_set_bit (load_index, lidx);
1161         }
1162       for (i = 0; i < group_size; i++)
1163         if (!bitmap_bit_p (load_index, i))
1164           {
1165             sbitmap_free (load_index);
1166             return false;
1167           }
1168       sbitmap_free (load_index);
1169
1170       /* This permutation is valid for reduction.  Since the order of the
1171          statements in the nodes is not important unless they are memory
1172          accesses, we can rearrange the statements in all the nodes
1173          according to the order of the loads.  */
1174       vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1175                                 node->load_permutation);
1176
1177       /* We are done, no actual permutations need to be generated.  */
1178       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1179         SLP_TREE_LOAD_PERMUTATION (node).release ();
1180       return true;
1181     }
1182
1183   /* In basic block vectorization we allow any subchain of an interleaving
1184      chain.
1185      FORNOW: not supported in loop SLP because of realignment compications.  */
1186   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1187     {
1188       /* Check that for every node in the instance the loads
1189          form a subchain.  */
1190       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1191         {
1192           next_load = NULL;
1193           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1194             {
1195               if (j != 0 && next_load != load)
1196                 return false;
1197               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1198             }
1199         }
1200
1201       /* Check that the alignment of the first load in every subchain, i.e.,
1202          the first statement in every load node, is supported.
1203          ???  This belongs in alignment checking.  */
1204       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1205         {
1206           first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1207           if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1208             {
1209               dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1210               if (vect_supportable_dr_alignment (dr, false)
1211                   == dr_unaligned_unsupported)
1212                 {
1213                   if (dump_enabled_p ())
1214                     {
1215                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1216                                        vect_location,
1217                                        "unsupported unaligned load ");
1218                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1219                                         first_load, 0);
1220                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1221                     }
1222                   return false;
1223                 }
1224             }
1225         }
1226
1227       /* We are done, no actual permutations need to be generated.  */
1228       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1229         SLP_TREE_LOAD_PERMUTATION (node).release ();
1230       return true;
1231     }
1232
1233   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1234      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1235      well (unless it's reduction).  */
1236   if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1237     return false;
1238   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1239     if (!node->load_permutation.exists ())
1240       return false;
1241
1242   load_index = sbitmap_alloc (group_size);
1243   bitmap_clear (load_index);
1244   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1245     {
1246       unsigned int lidx = node->load_permutation[0];
1247       if (bitmap_bit_p (load_index, lidx))
1248         {
1249           sbitmap_free (load_index);
1250           return false;
1251         }
1252       bitmap_set_bit (load_index, lidx);
1253       FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1254         if (k != lidx)
1255           {
1256             sbitmap_free (load_index);
1257             return false;
1258           }
1259     }
1260   for (i = 0; i < group_size; i++)
1261     if (!bitmap_bit_p (load_index, i))
1262       {
1263         sbitmap_free (load_index);
1264         return false;
1265       }
1266   sbitmap_free (load_index);
1267
1268   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1269     if (node->load_permutation.exists ()
1270         && !vect_transform_slp_perm_load
1271               (node, vNULL, NULL,
1272                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1273       return false;
1274   return true;
1275 }
1276
1277
1278 /* Find the first load in the loop that belongs to INSTANCE.
1279    When loads are in several SLP nodes, there can be a case in which the first
1280    load does not appear in the first SLP node to be transformed, causing
1281    incorrect order of statements.  Since we generate all the loads together,
1282    they must be inserted before the first load of the SLP instance and not
1283    before the first load of the first node of the instance.  */
1284
1285 static gimple
1286 vect_find_first_load_in_slp_instance (slp_instance instance)
1287 {
1288   int i, j;
1289   slp_tree load_node;
1290   gimple first_load = NULL, load;
1291
1292   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1293     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1294       first_load = get_earlier_stmt (load, first_load);
1295
1296   return first_load;
1297 }
1298
1299
1300 /* Find the last store in SLP INSTANCE.  */
1301
1302 static gimple
1303 vect_find_last_store_in_slp_instance (slp_instance instance)
1304 {
1305   int i;
1306   slp_tree node;
1307   gimple last_store = NULL, store;
1308
1309   node = SLP_INSTANCE_TREE (instance);
1310   for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1311     last_store = get_later_stmt (store, last_store);
1312
1313   return last_store;
1314 }
1315
1316 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1317
1318 static void
1319 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1320                          slp_instance instance, slp_tree node,
1321                          stmt_vector_for_cost *prologue_cost_vec,
1322                          unsigned ncopies_for_cost)
1323 {
1324   stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1325
1326   unsigned i;
1327   slp_tree child;
1328   gimple stmt, s;
1329   stmt_vec_info stmt_info;
1330   tree lhs;
1331   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1332
1333   /* Recurse down the SLP tree.  */
1334   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1335     vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1336                              instance, child, prologue_cost_vec,
1337                              ncopies_for_cost);
1338
1339   /* Look at the first scalar stmt to determine the cost.  */
1340   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1341   stmt_info = vinfo_for_stmt (stmt);
1342   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1343     {
1344       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1345         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1346                                vect_uninitialized_def,
1347                                node, prologue_cost_vec, body_cost_vec);
1348       else
1349         {
1350           int i;
1351           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1352           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1353                                 node, prologue_cost_vec, body_cost_vec);
1354           /* If the load is permuted record the cost for the permutation.
1355              ???  Loads from multiple chains are let through here only
1356              for a single special case involving complex numbers where
1357              in the end no permutation is necessary.  */
1358           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1359             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1360                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1361                 && vect_get_place_in_interleaving_chain
1362                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1363               {
1364                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1365                                   stmt_info, 0, vect_body);
1366                 break;
1367               }
1368         }
1369     }
1370   else
1371     record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1372                       stmt_info, 0, vect_body);
1373
1374   /* Scan operands and account for prologue cost of constants/externals.
1375      ???  This over-estimates cost for multiple uses and should be
1376      re-engineered.  */
1377   lhs = gimple_get_lhs (stmt);
1378   for (i = 0; i < gimple_num_ops (stmt); ++i)
1379     {
1380       tree def, op = gimple_op (stmt, i);
1381       gimple def_stmt;
1382       enum vect_def_type dt;
1383       if (!op || op == lhs)
1384         continue;
1385       if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1386                               &def_stmt, &def, &dt)
1387           && (dt == vect_constant_def || dt == vect_external_def))
1388         record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1389                           stmt_info, 0, vect_prologue);
1390     }
1391 }
1392
1393 /* Compute the cost for the SLP instance INSTANCE.  */
1394
1395 static void
1396 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1397                        slp_instance instance, unsigned nunits)
1398 {
1399   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1400   unsigned ncopies_for_cost;
1401   stmt_info_for_cost *si;
1402   unsigned i;
1403
1404   /* Calculate the number of vector stmts to create based on the unrolling
1405      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1406      GROUP_SIZE / NUNITS otherwise.  */
1407   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1408   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1409
1410   prologue_cost_vec.create (10);
1411   body_cost_vec.create (10);
1412   SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1413   vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1414                            instance, SLP_INSTANCE_TREE (instance),
1415                            &prologue_cost_vec, ncopies_for_cost);
1416
1417   /* Record the prologue costs, which were delayed until we were
1418      sure that SLP was successful.  Unlike the body costs, we know
1419      the final values now regardless of the loop vectorization factor.  */
1420   void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1421                 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1422   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1423     {
1424       struct _stmt_vec_info *stmt_info
1425         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1426       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1427                             si->misalign, vect_prologue);
1428     }
1429
1430   prologue_cost_vec.release ();
1431 }
1432
1433 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1434    vect_build_slp_tree to build a tree of packed stmts if possible.
1435    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1436
1437 static bool
1438 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1439                            gimple stmt)
1440 {
1441   slp_instance new_instance;
1442   slp_tree node;
1443   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1444   unsigned int unrolling_factor = 1, nunits;
1445   tree vectype, scalar_type = NULL_TREE;
1446   gimple next;
1447   unsigned int vectorization_factor = 0;
1448   int i;
1449   unsigned int max_nunits = 0;
1450   vec<slp_tree> loads;
1451   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1452   vec<gimple> scalar_stmts;
1453
1454   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1455     {
1456       if (dr)
1457         {
1458           scalar_type = TREE_TYPE (DR_REF (dr));
1459           vectype = get_vectype_for_scalar_type (scalar_type);
1460         }
1461       else
1462         {
1463           gcc_assert (loop_vinfo);
1464           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1465         }
1466
1467       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1468     }
1469   else
1470     {
1471       gcc_assert (loop_vinfo);
1472       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1473       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1474     }
1475
1476   if (!vectype)
1477     {
1478       if (dump_enabled_p ())
1479         {
1480           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1481                            "Build SLP failed: unsupported data-type ");
1482           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1483           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1484         }
1485
1486       return false;
1487     }
1488
1489   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1490   if (loop_vinfo)
1491     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1492   else
1493     vectorization_factor = nunits;
1494
1495   /* Calculate the unrolling factor.  */
1496   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1497   if (unrolling_factor != 1 && !loop_vinfo)
1498     {
1499       if (dump_enabled_p ())
1500         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1501                          "Build SLP failed: unrolling required in basic"
1502                          " block SLP\n");
1503
1504       return false;
1505     }
1506
1507   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1508   scalar_stmts.create (group_size);
1509   next = stmt;
1510   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1511     {
1512       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1513       while (next)
1514         {
1515           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1516               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1517             scalar_stmts.safe_push (
1518                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1519           else
1520             scalar_stmts.safe_push (next);
1521           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1522         }
1523     }
1524   else
1525     {
1526       /* Collect reduction statements.  */
1527       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1528       for (i = 0; reductions.iterate (i, &next); i++)
1529         scalar_stmts.safe_push (next);
1530     }
1531
1532   node = vect_create_new_slp_node (scalar_stmts);
1533
1534   loads.create (group_size);
1535
1536   /* Build the tree for the SLP instance.  */
1537   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1538                            &max_nunits, &loads,
1539                            vectorization_factor, NULL, NULL))
1540     {
1541       /* Calculate the unrolling factor based on the smallest type.  */
1542       if (max_nunits > nunits)
1543         unrolling_factor = least_common_multiple (max_nunits, group_size)
1544                            / group_size;
1545
1546       if (unrolling_factor != 1 && !loop_vinfo)
1547         {
1548           if (dump_enabled_p ())
1549             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1550                              "Build SLP failed: unrolling required in basic"
1551                              " block SLP\n");
1552           vect_free_slp_tree (node);
1553           loads.release ();
1554           return false;
1555         }
1556
1557       /* Create a new SLP instance.  */
1558       new_instance = XNEW (struct _slp_instance);
1559       SLP_INSTANCE_TREE (new_instance) = node;
1560       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1561       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1562       SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1563       SLP_INSTANCE_LOADS (new_instance) = loads;
1564       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1565
1566       /* Compute the load permutation.  */
1567       slp_tree load_node;
1568       bool loads_permuted = false;
1569       FOR_EACH_VEC_ELT (loads, i, load_node)
1570         {
1571           vec<unsigned> load_permutation;
1572           int j;
1573           gimple load, first_stmt;
1574           bool this_load_permuted = false;
1575           load_permutation.create (group_size);
1576           first_stmt = GROUP_FIRST_ELEMENT
1577               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1578           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1579             {
1580               int load_place
1581                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1582               gcc_assert (load_place != -1);
1583               if (load_place != j)
1584                 this_load_permuted = true;
1585               load_permutation.safe_push (load_place);
1586             }
1587           if (!this_load_permuted)
1588             {
1589               load_permutation.release ();
1590               continue;
1591             }
1592           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1593           loads_permuted = true;
1594         }
1595
1596       if (loads_permuted)
1597         {
1598           if (!vect_supported_load_permutation_p (new_instance))
1599             {
1600               if (dump_enabled_p ())
1601                 {
1602                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1603                                    "Build SLP failed: unsupported load "
1604                                    "permutation ");
1605                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1606                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1607                 }
1608               vect_free_slp_instance (new_instance);
1609               return false;
1610             }
1611
1612           SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1613             = vect_find_first_load_in_slp_instance (new_instance);
1614         }
1615
1616       /* Compute the costs of this SLP instance.  */
1617       vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1618                              new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1619
1620       if (loop_vinfo)
1621         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1622       else
1623         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1624
1625       if (dump_enabled_p ())
1626         vect_print_slp_tree (MSG_NOTE, node);
1627
1628       return true;
1629     }
1630
1631   /* Failed to SLP.  */
1632   /* Free the allocated memory.  */
1633   vect_free_slp_tree (node);
1634   loads.release ();
1635
1636   return false;
1637 }
1638
1639
1640 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1641    trees of packed scalar stmts if SLP is possible.  */
1642
1643 bool
1644 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1645 {
1646   unsigned int i;
1647   vec<gimple> grouped_stores;
1648   vec<gimple> reductions = vNULL;
1649   vec<gimple> reduc_chains = vNULL;
1650   gimple first_element;
1651   bool ok = false;
1652
1653   if (dump_enabled_p ())
1654     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1655
1656   if (loop_vinfo)
1657     {
1658       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1659       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1660       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1661     }
1662   else
1663     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1664
1665   /* Find SLP sequences starting from groups of grouped stores.  */
1666   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1667     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1668       ok = true;
1669
1670   if (bb_vinfo && !ok)
1671     {
1672       if (dump_enabled_p ())
1673         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1674                          "Failed to SLP the basic block.\n");
1675
1676       return false;
1677     }
1678
1679   if (loop_vinfo
1680       && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1681     {
1682       /* Find SLP sequences starting from reduction chains.  */
1683       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1684         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1685           ok = true;
1686         else
1687           return false;
1688
1689       /* Don't try to vectorize SLP reductions if reduction chain was
1690          detected.  */
1691       return ok;
1692     }
1693
1694   /* Find SLP sequences starting from groups of reductions.  */
1695   if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1696       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1697     ok = true;
1698
1699   return true;
1700 }
1701
1702
1703 /* For each possible SLP instance decide whether to SLP it and calculate overall
1704    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1705    least one instance.  */
1706
1707 bool
1708 vect_make_slp_decision (loop_vec_info loop_vinfo)
1709 {
1710   unsigned int i, unrolling_factor = 1;
1711   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1712   slp_instance instance;
1713   int decided_to_slp = 0;
1714
1715   if (dump_enabled_p ())
1716     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1717                      "\n");
1718
1719   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1720     {
1721       /* FORNOW: SLP if you can.  */
1722       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1723         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1724
1725       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1726          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1727          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1728       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1729       decided_to_slp++;
1730     }
1731
1732   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1733
1734   if (decided_to_slp && dump_enabled_p ())
1735     dump_printf_loc (MSG_NOTE, vect_location,
1736                      "Decided to SLP %d instances. Unrolling factor %d\n",
1737                      decided_to_slp, unrolling_factor);
1738
1739   return (decided_to_slp > 0);
1740 }
1741
1742
1743 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1744    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1745
1746 static void
1747 vect_detect_hybrid_slp_stmts (slp_tree node)
1748 {
1749   int i;
1750   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1751   gimple stmt = stmts[0];
1752   imm_use_iterator imm_iter;
1753   gimple use_stmt;
1754   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1755   slp_tree child;
1756   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1757   struct loop *loop = NULL;
1758   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1759   basic_block bb = NULL;
1760
1761   if (!node)
1762     return;
1763
1764   if (loop_vinfo)
1765     loop = LOOP_VINFO_LOOP (loop_vinfo);
1766   else
1767     bb = BB_VINFO_BB (bb_vinfo);
1768
1769   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1770     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1771         && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1772       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1773         if (gimple_bb (use_stmt)
1774             && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1775                  || bb == gimple_bb (use_stmt))
1776             && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1777             && !STMT_SLP_TYPE (stmt_vinfo)
1778             && (STMT_VINFO_RELEVANT (stmt_vinfo)
1779                 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1780             && !(gimple_code (use_stmt) == GIMPLE_PHI
1781                  && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1782                   == vect_reduction_def))
1783           vect_mark_slp_stmts (node, hybrid, i);
1784
1785   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1786     vect_detect_hybrid_slp_stmts (child);
1787 }
1788
1789
1790 /* Find stmts that must be both vectorized and SLPed.  */
1791
1792 void
1793 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1794 {
1795   unsigned int i;
1796   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1797   slp_instance instance;
1798
1799   if (dump_enabled_p ())
1800     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1801                      "\n");
1802
1803   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1804     vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1805 }
1806
1807
1808 /* Create and initialize a new bb_vec_info struct for BB, as well as
1809    stmt_vec_info structs for all the stmts in it.  */
1810
1811 static bb_vec_info
1812 new_bb_vec_info (basic_block bb)
1813 {
1814   bb_vec_info res = NULL;
1815   gimple_stmt_iterator gsi;
1816
1817   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1818   BB_VINFO_BB (res) = bb;
1819
1820   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1821     {
1822       gimple stmt = gsi_stmt (gsi);
1823       gimple_set_uid (stmt, 0);
1824       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1825     }
1826
1827   BB_VINFO_GROUPED_STORES (res).create (10);
1828   BB_VINFO_SLP_INSTANCES (res).create (2);
1829   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1830
1831   bb->aux = res;
1832   return res;
1833 }
1834
1835
1836 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1837    stmts in the basic block.  */
1838
1839 static void
1840 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1841 {
1842   vec<slp_instance> slp_instances;
1843   slp_instance instance;
1844   basic_block bb;
1845   gimple_stmt_iterator si;
1846   unsigned i;
1847
1848   if (!bb_vinfo)
1849     return;
1850
1851   bb = BB_VINFO_BB (bb_vinfo);
1852
1853   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1854     {
1855       gimple stmt = gsi_stmt (si);
1856       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1857
1858       if (stmt_info)
1859         /* Free stmt_vec_info.  */
1860         free_stmt_vec_info (stmt);
1861     }
1862
1863   vect_destroy_datarefs (NULL, bb_vinfo);
1864   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1865   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1866   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1867   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1868     vect_free_slp_instance (instance);
1869   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1870   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1871   free (bb_vinfo);
1872   bb->aux = NULL;
1873 }
1874
1875
1876 /* Analyze statements contained in SLP tree node after recursively analyzing
1877    the subtree. Return TRUE if the operations are supported.  */
1878
1879 static bool
1880 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1881 {
1882   bool dummy;
1883   int i;
1884   gimple stmt;
1885   slp_tree child;
1886
1887   if (!node)
1888     return true;
1889
1890   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1891     if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1892       return false;
1893
1894   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1895     {
1896       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1897       gcc_assert (stmt_info);
1898       gcc_assert (PURE_SLP_STMT (stmt_info));
1899
1900       if (!vect_analyze_stmt (stmt, &dummy, node))
1901         return false;
1902     }
1903
1904   return true;
1905 }
1906
1907
1908 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
1909    operations are supported. */
1910
1911 static bool
1912 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1913 {
1914   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1915   slp_instance instance;
1916   int i;
1917
1918   for (i = 0; slp_instances.iterate (i, &instance); )
1919     {
1920       if (!vect_slp_analyze_node_operations (bb_vinfo,
1921                                              SLP_INSTANCE_TREE (instance)))
1922         {
1923           vect_free_slp_instance (instance);
1924           slp_instances.ordered_remove (i);
1925         }
1926       else
1927         i++;
1928     }
1929
1930   if (!slp_instances.length ())
1931     return false;
1932
1933   return true;
1934 }
1935
1936
1937 /* Compute the scalar cost of the SLP node NODE and its children
1938    and return it.  Do not account defs that are marked in LIFE and
1939    update LIFE according to uses of NODE.  */
1940
1941 static unsigned
1942 vect_bb_slp_scalar_cost (basic_block bb,
1943                          slp_tree node, vec<bool, va_heap> *life)
1944 {
1945   unsigned scalar_cost = 0;
1946   unsigned i;
1947   gimple stmt;
1948   slp_tree child;
1949
1950   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1951     {
1952       unsigned stmt_cost;
1953       ssa_op_iter op_iter;
1954       def_operand_p def_p;
1955       stmt_vec_info stmt_info;
1956
1957       if ((*life)[i])
1958         continue;
1959
1960       /* If there is a non-vectorized use of the defs then the scalar
1961          stmt is kept live in which case we do not account it or any
1962          required defs in the SLP children in the scalar cost.  This
1963          way we make the vectorization more costly when compared to
1964          the scalar cost.  */
1965       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
1966         {
1967           imm_use_iterator use_iter;
1968           gimple use_stmt;
1969           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
1970             if (gimple_code (use_stmt) == GIMPLE_PHI
1971                 || gimple_bb (use_stmt) != bb
1972                 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))
1973               {
1974                 (*life)[i] = true;
1975                 BREAK_FROM_IMM_USE_STMT (use_iter);
1976               }
1977         }
1978       if ((*life)[i])
1979         continue;
1980
1981       stmt_info = vinfo_for_stmt (stmt);
1982       if (STMT_VINFO_DATA_REF (stmt_info))
1983         {
1984           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1985             stmt_cost = vect_get_stmt_cost (scalar_load);
1986           else
1987             stmt_cost = vect_get_stmt_cost (scalar_store);
1988         }
1989       else
1990         stmt_cost = vect_get_stmt_cost (scalar_stmt);
1991
1992       scalar_cost += stmt_cost;
1993     }
1994
1995   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1996     scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
1997
1998   return scalar_cost;
1999 }
2000
2001 /* Check if vectorization of the basic block is profitable.  */
2002
2003 static bool
2004 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2005 {
2006   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2007   slp_instance instance;
2008   int i, j;
2009   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2010   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2011   void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2012   stmt_vec_info stmt_info = NULL;
2013   stmt_vector_for_cost body_cost_vec;
2014   stmt_info_for_cost *ci;
2015
2016   /* Calculate vector costs.  */
2017   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2018     {
2019       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2020
2021       FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2022         {
2023           stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2024           (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2025                                 stmt_info, ci->misalign, vect_body);
2026         }
2027     }
2028
2029   /* Calculate scalar cost.  */
2030   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2031     {
2032       stack_vec<bool, 20> life;
2033       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2034       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2035                                               SLP_INSTANCE_TREE (instance),
2036                                               &life);
2037     }
2038
2039   /* Complete the target-specific cost calculation.  */
2040   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2041                &vec_inside_cost, &vec_epilogue_cost);
2042
2043   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2044
2045   if (dump_enabled_p ())
2046     {
2047       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2048       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2049                    vec_inside_cost);
2050       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2051       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2052       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
2053     }
2054
2055   /* Vectorization is profitable if its cost is less than the cost of scalar
2056      version.  */
2057   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2058     return false;
2059
2060   return true;
2061 }
2062
2063 /* Check if the basic block can be vectorized.  */
2064
2065 static bb_vec_info
2066 vect_slp_analyze_bb_1 (basic_block bb)
2067 {
2068   bb_vec_info bb_vinfo;
2069   vec<slp_instance> slp_instances;
2070   slp_instance instance;
2071   int i;
2072   int min_vf = 2;
2073
2074   bb_vinfo = new_bb_vec_info (bb);
2075   if (!bb_vinfo)
2076     return NULL;
2077
2078   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2079     {
2080       if (dump_enabled_p ())
2081         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2082                          "not vectorized: unhandled data-ref in basic "
2083                          "block.\n");
2084
2085       destroy_bb_vec_info (bb_vinfo);
2086       return NULL;
2087     }
2088
2089   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2090     {
2091       if (dump_enabled_p ())
2092         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2093                          "not vectorized: not enough data-refs in "
2094                          "basic block.\n");
2095
2096       destroy_bb_vec_info (bb_vinfo);
2097       return NULL;
2098     }
2099
2100   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2101     {
2102      if (dump_enabled_p ())
2103        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2104                         "not vectorized: unhandled data access in "
2105                         "basic block.\n");
2106
2107       destroy_bb_vec_info (bb_vinfo);
2108       return NULL;
2109     }
2110
2111   vect_pattern_recog (NULL, bb_vinfo);
2112
2113   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2114      {
2115        if (dump_enabled_p ())
2116          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2117                           "not vectorized: unhandled data dependence "
2118                           "in basic block.\n");
2119
2120        destroy_bb_vec_info (bb_vinfo);
2121        return NULL;
2122      }
2123
2124   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2125     {
2126       if (dump_enabled_p ())
2127         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2128                          "not vectorized: bad data alignment in basic "
2129                          "block.\n");
2130
2131       destroy_bb_vec_info (bb_vinfo);
2132       return NULL;
2133     }
2134
2135   /* Check the SLP opportunities in the basic block, analyze and build SLP
2136      trees.  */
2137   if (!vect_analyze_slp (NULL, bb_vinfo))
2138     {
2139       if (dump_enabled_p ())
2140         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2141                          "not vectorized: failed to find SLP opportunities "
2142                          "in basic block.\n");
2143
2144       destroy_bb_vec_info (bb_vinfo);
2145       return NULL;
2146     }
2147
2148   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2149
2150   /* Mark all the statements that we want to vectorize as pure SLP and
2151      relevant.  */
2152   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2153     {
2154       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2155       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2156     }
2157
2158   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2159     {
2160       if (dump_enabled_p ())
2161         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2162                          "not vectorized: unsupported alignment in basic "
2163                          "block.\n");
2164       destroy_bb_vec_info (bb_vinfo);
2165       return NULL;
2166     }
2167
2168   if (!vect_slp_analyze_operations (bb_vinfo))
2169     {
2170       if (dump_enabled_p ())
2171         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2172                          "not vectorized: bad operation in basic block.\n");
2173
2174       destroy_bb_vec_info (bb_vinfo);
2175       return NULL;
2176     }
2177
2178   /* Cost model: check if the vectorization is worthwhile.  */
2179   if (!unlimited_cost_model ()
2180       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2181     {
2182       if (dump_enabled_p ())
2183         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2184                          "not vectorized: vectorization is not "
2185                          "profitable.\n");
2186
2187       destroy_bb_vec_info (bb_vinfo);
2188       return NULL;
2189     }
2190
2191   if (dump_enabled_p ())
2192     dump_printf_loc (MSG_NOTE, vect_location,
2193                      "Basic block will be vectorized using SLP\n");
2194
2195   return bb_vinfo;
2196 }
2197
2198
2199 bb_vec_info
2200 vect_slp_analyze_bb (basic_block bb)
2201 {
2202   bb_vec_info bb_vinfo;
2203   int insns = 0;
2204   gimple_stmt_iterator gsi;
2205   unsigned int vector_sizes;
2206
2207   if (dump_enabled_p ())
2208     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2209
2210   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2211     {
2212       gimple stmt = gsi_stmt (gsi);
2213       if (!is_gimple_debug (stmt)
2214           && !gimple_nop_p (stmt)
2215           && gimple_code (stmt) != GIMPLE_LABEL)
2216         insns++;
2217     }
2218
2219   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2220     {
2221       if (dump_enabled_p ())
2222         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2223                          "not vectorized: too many instructions in "
2224                          "basic block.\n");
2225
2226       return NULL;
2227     }
2228
2229   /* Autodetect first vector size we try.  */
2230   current_vector_size = 0;
2231   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2232
2233   while (1)
2234     {
2235       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2236       if (bb_vinfo)
2237         return bb_vinfo;
2238
2239       destroy_bb_vec_info (bb_vinfo);
2240
2241       vector_sizes &= ~current_vector_size;
2242       if (vector_sizes == 0
2243           || current_vector_size == 0)
2244         return NULL;
2245
2246       /* Try the next biggest vector size.  */
2247       current_vector_size = 1 << floor_log2 (vector_sizes);
2248       if (dump_enabled_p ())
2249         dump_printf_loc (MSG_NOTE, vect_location,
2250                          "***** Re-trying analysis with "
2251                          "vector size %d\n", current_vector_size);
2252     }
2253 }
2254
2255
2256 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2257    the number of created vector stmts depends on the unrolling factor).
2258    However, the actual number of vector stmts for every SLP node depends on
2259    VF which is set later in vect_analyze_operations ().  Hence, SLP costs
2260    should be updated.  In this function we assume that the inside costs
2261    calculated in vect_model_xxx_cost are linear in ncopies.  */
2262
2263 void
2264 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2265 {
2266   unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2267   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2268   slp_instance instance;
2269   stmt_vector_for_cost body_cost_vec;
2270   stmt_info_for_cost *si;
2271   void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2272
2273   if (dump_enabled_p ())
2274     dump_printf_loc (MSG_NOTE, vect_location,
2275                      "=== vect_update_slp_costs_according_to_vf ===\n");
2276
2277   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2278     {
2279       /* We assume that costs are linear in ncopies.  */
2280       int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2281
2282       /* Record the instance's instructions in the target cost model.
2283          This was delayed until here because the count of instructions
2284          isn't known beforehand.  */
2285       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2286
2287       FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2288         (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2289                               vinfo_for_stmt (si->stmt), si->misalign,
2290                               vect_body);
2291     }
2292 }
2293
2294
2295 /* For constant and loop invariant defs of SLP_NODE this function returns
2296    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2297    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2298    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2299    REDUC_INDEX is the index of the reduction operand in the statements, unless
2300    it is -1.  */
2301
2302 static void
2303 vect_get_constant_vectors (tree op, slp_tree slp_node,
2304                            vec<tree> *vec_oprnds,
2305                            unsigned int op_num, unsigned int number_of_vectors,
2306                            int reduc_index)
2307 {
2308   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2309   gimple stmt = stmts[0];
2310   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2311   unsigned nunits;
2312   tree vec_cst;
2313   tree *elts;
2314   unsigned j, number_of_places_left_in_vector;
2315   tree vector_type;
2316   tree vop;
2317   int group_size = stmts.length ();
2318   unsigned int vec_num, i;
2319   unsigned number_of_copies = 1;
2320   vec<tree> voprnds;
2321   voprnds.create (number_of_vectors);
2322   bool constant_p, is_store;
2323   tree neutral_op = NULL;
2324   enum tree_code code = gimple_expr_code (stmt);
2325   gimple def_stmt;
2326   struct loop *loop;
2327   gimple_seq ctor_seq = NULL;
2328
2329   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2330       && reduc_index != -1)
2331     {
2332       op_num = reduc_index - 1;
2333       op = gimple_op (stmt, reduc_index);
2334       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2335          we need either neutral operands or the original operands.  See
2336          get_initial_def_for_reduction() for details.  */
2337       switch (code)
2338         {
2339           case WIDEN_SUM_EXPR:
2340           case DOT_PROD_EXPR:
2341           case PLUS_EXPR:
2342           case MINUS_EXPR:
2343           case BIT_IOR_EXPR:
2344           case BIT_XOR_EXPR:
2345              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2346                neutral_op = build_real (TREE_TYPE (op), dconst0);
2347              else
2348                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2349
2350              break;
2351
2352           case MULT_EXPR:
2353              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2354                neutral_op = build_real (TREE_TYPE (op), dconst1);
2355              else
2356                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2357
2358              break;
2359
2360           case BIT_AND_EXPR:
2361             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2362             break;
2363
2364           case MAX_EXPR:
2365           case MIN_EXPR:
2366             def_stmt = SSA_NAME_DEF_STMT (op);
2367             loop = (gimple_bb (stmt))->loop_father;
2368             neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2369                                                 loop_preheader_edge (loop));
2370             break;
2371
2372           default:
2373             neutral_op = NULL;
2374         }
2375     }
2376
2377   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2378     {
2379       is_store = true;
2380       op = gimple_assign_rhs1 (stmt);
2381     }
2382   else
2383     is_store = false;
2384
2385   gcc_assert (op);
2386
2387   if (CONSTANT_CLASS_P (op))
2388     constant_p = true;
2389   else
2390     constant_p = false;
2391
2392   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2393   gcc_assert (vector_type);
2394   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2395
2396   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2397      created vectors. It is greater than 1 if unrolling is performed.
2398
2399      For example, we have two scalar operands, s1 and s2 (e.g., group of
2400      strided accesses of size two), while NUNITS is four (i.e., four scalars
2401      of this type can be packed in a vector).  The output vector will contain
2402      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2403      will be 2).
2404
2405      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2406      containing the operands.
2407
2408      For example, NUNITS is four as before, and the group size is 8
2409      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2410      {s5, s6, s7, s8}.  */
2411
2412   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2413
2414   number_of_places_left_in_vector = nunits;
2415   elts = XALLOCAVEC (tree, nunits);
2416   for (j = 0; j < number_of_copies; j++)
2417     {
2418       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2419         {
2420           if (is_store)
2421             op = gimple_assign_rhs1 (stmt);
2422           else
2423             {
2424               switch (code)
2425                 {
2426                   case COND_EXPR:
2427                     if (op_num == 0 || op_num == 1)
2428                       {
2429                         tree cond = gimple_assign_rhs1 (stmt);
2430                         op = TREE_OPERAND (cond, op_num);
2431                       }
2432                     else
2433                       {
2434                         if (op_num == 2)
2435                           op = gimple_assign_rhs2 (stmt);
2436                         else
2437                           op = gimple_assign_rhs3 (stmt);
2438                       }
2439                     break;
2440
2441                   case CALL_EXPR:
2442                     op = gimple_call_arg (stmt, op_num);
2443                     break;
2444
2445                   case LSHIFT_EXPR:
2446                   case RSHIFT_EXPR:
2447                   case LROTATE_EXPR:
2448                   case RROTATE_EXPR:
2449                     op = gimple_op (stmt, op_num + 1);
2450                     /* Unlike the other binary operators, shifts/rotates have
2451                        the shift count being int, instead of the same type as
2452                        the lhs, so make sure the scalar is the right type if
2453                        we are dealing with vectors of
2454                        long long/long/short/char.  */
2455                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2456                       op = fold_convert (TREE_TYPE (vector_type), op);
2457                     break;
2458
2459                   default:
2460                     op = gimple_op (stmt, op_num + 1);
2461                     break;
2462                 }
2463             }
2464
2465           if (reduc_index != -1)
2466             {
2467               loop = (gimple_bb (stmt))->loop_father;
2468               def_stmt = SSA_NAME_DEF_STMT (op);
2469
2470               gcc_assert (loop);
2471
2472               /* Get the def before the loop.  In reduction chain we have only
2473                  one initial value.  */
2474               if ((j != (number_of_copies - 1)
2475                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2476                        && i != 0))
2477                   && neutral_op)
2478                 op = neutral_op;
2479               else
2480                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2481                                             loop_preheader_edge (loop));
2482             }
2483
2484           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2485           number_of_places_left_in_vector--;
2486           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2487             {
2488               if (CONSTANT_CLASS_P (op))
2489                 {
2490                   op = fold_unary (VIEW_CONVERT_EXPR,
2491                                    TREE_TYPE (vector_type), op);
2492                   gcc_assert (op && CONSTANT_CLASS_P (op));
2493                 }
2494               else
2495                 {
2496                   tree new_temp
2497                     = make_ssa_name (TREE_TYPE (vector_type), NULL);
2498                   gimple init_stmt;
2499                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2500                                op);               
2501                   init_stmt
2502                     = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2503                                                     new_temp, op, NULL_TREE);
2504                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2505                   op = new_temp;
2506                 }
2507             }
2508           elts[number_of_places_left_in_vector] = op;
2509           if (!CONSTANT_CLASS_P (op))
2510             constant_p = false;
2511
2512           if (number_of_places_left_in_vector == 0)
2513             {
2514               number_of_places_left_in_vector = nunits;
2515
2516               if (constant_p)
2517                 vec_cst = build_vector (vector_type, elts);
2518               else
2519                 {
2520                   vec<constructor_elt, va_gc> *v;
2521                   unsigned k;
2522                   vec_alloc (v, nunits);
2523                   for (k = 0; k < nunits; ++k)
2524                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2525                   vec_cst = build_constructor (vector_type, v);
2526                 }
2527               voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2528                                                     vector_type, NULL));
2529               if (ctor_seq != NULL)
2530                 {
2531                   gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2532                   gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2533                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2534                                                         GSI_SAME_STMT);
2535                   ctor_seq = NULL;
2536                 }
2537             }
2538         }
2539     }
2540
2541   /* Since the vectors are created in the reverse order, we should invert
2542      them.  */
2543   vec_num = voprnds.length ();
2544   for (j = vec_num; j != 0; j--)
2545     {
2546       vop = voprnds[j - 1];
2547       vec_oprnds->quick_push (vop);
2548     }
2549
2550   voprnds.release ();
2551
2552   /* In case that VF is greater than the unrolling factor needed for the SLP
2553      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2554      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2555      to replicate the vectors.  */
2556   while (number_of_vectors > vec_oprnds->length ())
2557     {
2558       tree neutral_vec = NULL;
2559
2560       if (neutral_op)
2561         {
2562           if (!neutral_vec)
2563             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2564
2565           vec_oprnds->quick_push (neutral_vec);
2566         }
2567       else
2568         {
2569           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2570             vec_oprnds->quick_push (vop);
2571         }
2572     }
2573 }
2574
2575
2576 /* Get vectorized definitions from SLP_NODE that contains corresponding
2577    vectorized def-stmts.  */
2578
2579 static void
2580 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2581 {
2582   tree vec_oprnd;
2583   gimple vec_def_stmt;
2584   unsigned int i;
2585
2586   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2587
2588   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2589     {
2590       gcc_assert (vec_def_stmt);
2591       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2592       vec_oprnds->quick_push (vec_oprnd);
2593     }
2594 }
2595
2596
2597 /* Get vectorized definitions for SLP_NODE.
2598    If the scalar definitions are loop invariants or constants, collect them and
2599    call vect_get_constant_vectors() to create vector stmts.
2600    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2601    must be stored in the corresponding child of SLP_NODE, and we call
2602    vect_get_slp_vect_defs () to retrieve them.  */
2603
2604 void
2605 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2606                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2607 {
2608   gimple first_stmt;
2609   int number_of_vects = 0, i;
2610   unsigned int child_index = 0;
2611   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2612   slp_tree child = NULL;
2613   vec<tree> vec_defs;
2614   tree oprnd;
2615   bool vectorized_defs;
2616
2617   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2618   FOR_EACH_VEC_ELT (ops, i, oprnd)
2619     {
2620       /* For each operand we check if it has vectorized definitions in a child
2621          node or we need to create them (for invariants and constants).  We
2622          check if the LHS of the first stmt of the next child matches OPRND.
2623          If it does, we found the correct child.  Otherwise, we call
2624          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2625          to check this child node for the next operand.  */
2626       vectorized_defs = false;
2627       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2628         {
2629           child = SLP_TREE_CHILDREN (slp_node)[child_index];
2630
2631           /* We have to check both pattern and original def, if available.  */
2632           gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2633           gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2634
2635           if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2636               || (related
2637                   && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2638             {
2639               /* The number of vector defs is determined by the number of
2640                  vector statements in the node from which we get those
2641                  statements.  */
2642               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2643               vectorized_defs = true;
2644               child_index++;
2645             }
2646         }
2647
2648       if (!vectorized_defs)
2649         {
2650           if (i == 0)
2651             {
2652               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2653               /* Number of vector stmts was calculated according to LHS in
2654                  vect_schedule_slp_instance (), fix it by replacing LHS with
2655                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2656                  details.  */
2657               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2658                                              &rhs_size_unit);
2659               if (rhs_size_unit != lhs_size_unit)
2660                 {
2661                   number_of_vects *= rhs_size_unit;
2662                   number_of_vects /= lhs_size_unit;
2663                 }
2664             }
2665         }
2666
2667       /* Allocate memory for vectorized defs.  */
2668       vec_defs = vNULL;
2669       vec_defs.create (number_of_vects);
2670
2671       /* For reduction defs we call vect_get_constant_vectors (), since we are
2672          looking for initial loop invariant values.  */
2673       if (vectorized_defs && reduc_index == -1)
2674         /* The defs are already vectorized.  */
2675         vect_get_slp_vect_defs (child, &vec_defs);
2676       else
2677         /* Build vectors from scalar defs.  */
2678         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2679                                    number_of_vects, reduc_index);
2680
2681       vec_oprnds->quick_push (vec_defs);
2682
2683       /* For reductions, we only need initial values.  */
2684       if (reduc_index != -1)
2685         return;
2686     }
2687 }
2688
2689
2690 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2691    building a vector of type MASK_TYPE from it) and two input vectors placed in
2692    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2693    shifting by STRIDE elements of DR_CHAIN for every copy.
2694    (STRIDE is the number of vectorized stmts for NODE divided by the number of
2695    copies).
2696    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2697    the created stmts must be inserted.  */
2698
2699 static inline void
2700 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2701                            tree mask, int first_vec_indx, int second_vec_indx,
2702                            gimple_stmt_iterator *gsi, slp_tree node,
2703                            tree vectype, vec<tree> dr_chain,
2704                            int ncopies, int vect_stmts_counter)
2705 {
2706   tree perm_dest;
2707   gimple perm_stmt = NULL;
2708   stmt_vec_info next_stmt_info;
2709   int i, stride;
2710   tree first_vec, second_vec, data_ref;
2711
2712   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2713
2714   /* Initialize the vect stmts of NODE to properly insert the generated
2715      stmts later.  */
2716   for (i = SLP_TREE_VEC_STMTS (node).length ();
2717        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2718     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2719
2720   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2721   for (i = 0; i < ncopies; i++)
2722     {
2723       first_vec = dr_chain[first_vec_indx];
2724       second_vec = dr_chain[second_vec_indx];
2725
2726       /* Generate the permute statement.  */
2727       perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2728                                                 first_vec, second_vec, mask);
2729       data_ref = make_ssa_name (perm_dest, perm_stmt);
2730       gimple_set_lhs (perm_stmt, data_ref);
2731       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2732
2733       /* Store the vector statement in NODE.  */
2734       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2735
2736       first_vec_indx += stride;
2737       second_vec_indx += stride;
2738     }
2739
2740   /* Mark the scalar stmt as vectorized.  */
2741   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2742   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2743 }
2744
2745
2746 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2747    return in CURRENT_MASK_ELEMENT its equivalent in target specific
2748    representation.  Check that the mask is valid and return FALSE if not.
2749    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2750    the next vector, i.e., the current first vector is not needed.  */
2751
2752 static bool
2753 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2754                        int mask_nunits, bool only_one_vec, int index,
2755                        unsigned char *mask, int *current_mask_element,
2756                        bool *need_next_vector, int *number_of_mask_fixes,
2757                        bool *mask_fixed, bool *needs_first_vector)
2758 {
2759   int i;
2760
2761   /* Convert to target specific representation.  */
2762   *current_mask_element = first_mask_element + m;
2763   /* Adjust the value in case it's a mask for second and third vectors.  */
2764   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2765
2766   if (*current_mask_element < mask_nunits)
2767     *needs_first_vector = true;
2768
2769   /* We have only one input vector to permute but the mask accesses values in
2770      the next vector as well.  */
2771   if (only_one_vec && *current_mask_element >= mask_nunits)
2772     {
2773       if (dump_enabled_p ())
2774         {
2775           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2776                            "permutation requires at least two vectors ");
2777           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2778           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2779         }
2780
2781       return false;
2782     }
2783
2784   /* The mask requires the next vector.  */
2785   if (*current_mask_element >= mask_nunits * 2)
2786     {
2787       if (*needs_first_vector || *mask_fixed)
2788         {
2789           /* We either need the first vector too or have already moved to the
2790              next vector. In both cases, this permutation needs three
2791              vectors.  */
2792           if (dump_enabled_p ())
2793             {
2794               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2795                                "permutation requires at "
2796                                "least three vectors ");
2797               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2798               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2799             }
2800
2801           return false;
2802         }
2803
2804       /* We move to the next vector, dropping the first one and working with
2805          the second and the third - we need to adjust the values of the mask
2806          accordingly.  */
2807       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2808
2809       for (i = 0; i < index; i++)
2810         mask[i] -= mask_nunits * *number_of_mask_fixes;
2811
2812       (*number_of_mask_fixes)++;
2813       *mask_fixed = true;
2814     }
2815
2816   *need_next_vector = *mask_fixed;
2817
2818   /* This was the last element of this mask. Start a new one.  */
2819   if (index == mask_nunits - 1)
2820     {
2821       *number_of_mask_fixes = 1;
2822       *mask_fixed = false;
2823       *needs_first_vector = false;
2824     }
2825
2826   return true;
2827 }
2828
2829
2830 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2831    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2832    permute statements for the SLP node NODE of the SLP instance
2833    SLP_NODE_INSTANCE.  */
2834
2835 bool
2836 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2837                               gimple_stmt_iterator *gsi, int vf,
2838                               slp_instance slp_node_instance, bool analyze_only)
2839 {
2840   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2841   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2842   tree mask_element_type = NULL_TREE, mask_type;
2843   int i, j, k, nunits, vec_index = 0, scalar_index;
2844   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2845   gimple next_scalar_stmt;
2846   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2847   int first_mask_element;
2848   int index, unroll_factor, current_mask_element, ncopies;
2849   unsigned char *mask;
2850   bool only_one_vec = false, need_next_vector = false;
2851   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2852   int number_of_mask_fixes = 1;
2853   bool mask_fixed = false;
2854   bool needs_first_vector = false;
2855   enum machine_mode mode;
2856
2857   mode = TYPE_MODE (vectype);
2858
2859   if (!can_vec_perm_p (mode, false, NULL))
2860     {
2861       if (dump_enabled_p ())
2862         {
2863           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2864                            "no vect permute for ");
2865           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2866           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2867         }
2868       return false;
2869     }
2870
2871   /* The generic VEC_PERM_EXPR code always uses an integral type of the
2872      same size as the vector element being permuted.  */
2873   mask_element_type = lang_hooks.types.type_for_mode
2874                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2875   mask_type = get_vectype_for_scalar_type (mask_element_type);
2876   nunits = TYPE_VECTOR_SUBPARTS (vectype);
2877   mask = XALLOCAVEC (unsigned char, nunits);
2878   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2879
2880   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2881      unrolling factor.  */
2882   orig_vec_stmts_num = group_size *
2883                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2884   if (orig_vec_stmts_num == 1)
2885     only_one_vec = true;
2886
2887   /* Number of copies is determined by the final vectorization factor
2888      relatively to SLP_NODE_INSTANCE unrolling factor.  */
2889   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2890
2891   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2892     return false;
2893
2894   /* Generate permutation masks for every NODE. Number of masks for each NODE
2895      is equal to GROUP_SIZE.
2896      E.g., we have a group of three nodes with three loads from the same
2897      location in each node, and the vector size is 4. I.e., we have a
2898      a0b0c0a1b1c1... sequence and we need to create the following vectors:
2899      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2900      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2901      ...
2902
2903      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2904      The last mask is illegal since we assume two operands for permute
2905      operation, and the mask element values can't be outside that range.
2906      Hence, the last mask must be converted into {2,5,5,5}.
2907      For the first two permutations we need the first and the second input
2908      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2909      we need the second and the third vectors: {b1,c1,a2,b2} and
2910      {c2,a3,b3,c3}.  */
2911
2912     {
2913       scalar_index = 0;
2914       index = 0;
2915       vect_stmts_counter = 0;
2916       vec_index = 0;
2917       first_vec_index = vec_index++;
2918       if (only_one_vec)
2919         second_vec_index = first_vec_index;
2920       else
2921         second_vec_index =  vec_index++;
2922
2923       for (j = 0; j < unroll_factor; j++)
2924         {
2925           for (k = 0; k < group_size; k++)
2926             {
2927               i = SLP_TREE_LOAD_PERMUTATION (node)[k];
2928               first_mask_element = i + j * group_size;
2929               if (!vect_get_mask_element (stmt, first_mask_element, 0,
2930                                           nunits, only_one_vec, index,
2931                                           mask, &current_mask_element,
2932                                           &need_next_vector,
2933                                           &number_of_mask_fixes, &mask_fixed,
2934                                           &needs_first_vector))
2935                 return false;
2936               mask[index++] = current_mask_element;
2937
2938               if (index == nunits)
2939                 {
2940                   index = 0;
2941                   if (!can_vec_perm_p (mode, false, mask))
2942                     {
2943                       if (dump_enabled_p ())
2944                         {
2945                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2946                                            vect_location, 
2947                                            "unsupported vect permute { ");
2948                           for (i = 0; i < nunits; ++i)
2949                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2950                                          mask[i]);
2951                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2952                         }
2953                       return false;
2954                     }
2955
2956                   if (!analyze_only)
2957                     {
2958                       int l;
2959                       tree mask_vec, *mask_elts;
2960                       mask_elts = XALLOCAVEC (tree, nunits);
2961                       for (l = 0; l < nunits; ++l)
2962                         mask_elts[l] = build_int_cst (mask_element_type,
2963                                                       mask[l]);
2964                       mask_vec = build_vector (mask_type, mask_elts);
2965
2966                       if (need_next_vector)
2967                         {
2968                           first_vec_index = second_vec_index;
2969                           second_vec_index = vec_index;
2970                         }
2971
2972                       next_scalar_stmt
2973                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2974
2975                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
2976                                mask_vec, first_vec_index, second_vec_index,
2977                                gsi, node, vectype, dr_chain,
2978                                ncopies, vect_stmts_counter++);
2979                     }
2980                 }
2981             }
2982         }
2983     }
2984
2985   return true;
2986 }
2987
2988
2989
2990 /* Vectorize SLP instance tree in postorder.  */
2991
2992 static bool
2993 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
2994                             unsigned int vectorization_factor)
2995 {
2996   gimple stmt;
2997   bool grouped_store, is_store;
2998   gimple_stmt_iterator si;
2999   stmt_vec_info stmt_info;
3000   unsigned int vec_stmts_size, nunits, group_size;
3001   tree vectype;
3002   int i;
3003   slp_tree child;
3004
3005   if (!node)
3006     return false;
3007
3008   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3009     vect_schedule_slp_instance (child, instance, vectorization_factor);
3010
3011   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3012   stmt_info = vinfo_for_stmt (stmt);
3013
3014   /* VECTYPE is the type of the destination.  */
3015   vectype = STMT_VINFO_VECTYPE (stmt_info);
3016   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3017   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3018
3019   /* For each SLP instance calculate number of vector stmts to be created
3020      for the scalar stmts in each node of the SLP tree.  Number of vector
3021      elements in one vector iteration is the number of scalar elements in
3022      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3023      size.  */
3024   vec_stmts_size = (vectorization_factor * group_size) / nunits;
3025
3026   if (!SLP_TREE_VEC_STMTS (node).exists ())
3027     {
3028       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3029       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3030     }
3031
3032   if (dump_enabled_p ())
3033     {
3034       dump_printf_loc (MSG_NOTE,vect_location,
3035                        "------>vectorizing SLP node starting from: ");
3036       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3037       dump_printf (MSG_NOTE, "\n");
3038     }
3039
3040   /* Loads should be inserted before the first load.  */
3041   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3042       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3043       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3044       && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3045     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3046   else if (is_pattern_stmt_p (stmt_info))
3047     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3048   else
3049     si = gsi_for_stmt (stmt);
3050
3051   /* Stores should be inserted just before the last store.  */
3052   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3053       && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3054     { 
3055       gimple last_store = vect_find_last_store_in_slp_instance (instance);
3056       if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3057        last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3058       si = gsi_for_stmt (last_store);
3059     }
3060
3061   /* Mark the first element of the reduction chain as reduction to properly
3062      transform the node.  In the analysis phase only the last element of the
3063      chain is marked as reduction.  */
3064   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3065       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3066     {
3067       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3068       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3069     }
3070
3071   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3072   return is_store;
3073 }
3074
3075 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3076    For loop vectorization this is done in vectorizable_call, but for SLP
3077    it needs to be deferred until end of vect_schedule_slp, because multiple
3078    SLP instances may refer to the same scalar stmt.  */
3079
3080 static void
3081 vect_remove_slp_scalar_calls (slp_tree node)
3082 {
3083   gimple stmt, new_stmt;
3084   gimple_stmt_iterator gsi;
3085   int i;
3086   slp_tree child;
3087   tree lhs;
3088   stmt_vec_info stmt_info;
3089
3090   if (!node)
3091     return;
3092
3093   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3094     vect_remove_slp_scalar_calls (child);
3095
3096   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3097     {
3098       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3099         continue;
3100       stmt_info = vinfo_for_stmt (stmt);
3101       if (stmt_info == NULL
3102           || is_pattern_stmt_p (stmt_info)
3103           || !PURE_SLP_STMT (stmt_info))
3104         continue;
3105       lhs = gimple_call_lhs (stmt);
3106       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3107       set_vinfo_for_stmt (new_stmt, stmt_info);
3108       set_vinfo_for_stmt (stmt, NULL);
3109       STMT_VINFO_STMT (stmt_info) = new_stmt;
3110       gsi = gsi_for_stmt (stmt);
3111       gsi_replace (&gsi, new_stmt, false);
3112       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3113     }
3114 }
3115
3116 /* Generate vector code for all SLP instances in the loop/basic block.  */
3117
3118 bool
3119 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3120 {
3121   vec<slp_instance> slp_instances;
3122   slp_instance instance;
3123   unsigned int i, vf;
3124   bool is_store = false;
3125
3126   if (loop_vinfo)
3127     {
3128       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3129       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3130     }
3131   else
3132     {
3133       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3134       vf = 1;
3135     }
3136
3137   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3138     {
3139       /* Schedule the tree of INSTANCE.  */
3140       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3141                                              instance, vf);
3142       if (dump_enabled_p ())
3143         dump_printf_loc (MSG_NOTE, vect_location,
3144                          "vectorizing stmts using SLP.\n");
3145     }
3146
3147   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3148     {
3149       slp_tree root = SLP_INSTANCE_TREE (instance);
3150       gimple store;
3151       unsigned int j;
3152       gimple_stmt_iterator gsi;
3153
3154       /* Remove scalar call stmts.  Do not do this for basic-block
3155          vectorization as not all uses may be vectorized.
3156          ???  Why should this be necessary?  DCE should be able to
3157          remove the stmts itself.
3158          ???  For BB vectorization we can as well remove scalar
3159          stmts starting from the SLP tree root if they have no
3160          uses.  */
3161       if (loop_vinfo)
3162         vect_remove_slp_scalar_calls (root);
3163
3164       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3165                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3166         {
3167           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3168             break;
3169
3170          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3171            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3172           /* Free the attached stmt_vec_info and remove the stmt.  */
3173           gsi = gsi_for_stmt (store);
3174           unlink_stmt_vdef (store);
3175           gsi_remove (&gsi, true);
3176           release_defs (store);
3177           free_stmt_vec_info (store);
3178         }
3179     }
3180
3181   return is_store;
3182 }
3183
3184
3185 /* Vectorize the basic block.  */
3186
3187 void
3188 vect_slp_transform_bb (basic_block bb)
3189 {
3190   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3191   gimple_stmt_iterator si;
3192
3193   gcc_assert (bb_vinfo);
3194
3195   if (dump_enabled_p ())
3196     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3197
3198   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3199     {
3200       gimple stmt = gsi_stmt (si);
3201       stmt_vec_info stmt_info;
3202
3203       if (dump_enabled_p ())
3204         {
3205           dump_printf_loc (MSG_NOTE, vect_location,
3206                            "------>SLPing statement: ");
3207           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3208           dump_printf (MSG_NOTE, "\n");
3209         }
3210
3211       stmt_info = vinfo_for_stmt (stmt);
3212       gcc_assert (stmt_info);
3213
3214       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3215       if (STMT_SLP_TYPE (stmt_info))
3216         {
3217           vect_schedule_slp (NULL, bb_vinfo);
3218           break;
3219         }
3220     }
3221
3222   if (dump_enabled_p ())
3223     dump_printf_loc (MSG_NOTE, vect_location,
3224                      "BASIC BLOCK VECTORIZED\n");
3225
3226   destroy_bb_vec_info (bb_vinfo);
3227 }