re PR tree-optimization/59374 (-ftree-slp-vectorize breaks unique_ptr's move constructor)
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2014 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-ssa-alias.h"
33 #include "internal-fn.h"
34 #include "gimple-expr.h"
35 #include "is-a.h"
36 #include "gimple.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "tree-pass.h"
44 #include "cfgloop.h"
45 #include "expr.h"
46 #include "recog.h"              /* FIXME: for insn_data */
47 #include "optabs.h"
48 #include "tree-vectorizer.h"
49 #include "langhooks.h"
50
51 /* Extract the location of the basic block in the source code.
52    Return the basic block location if succeed and NULL if not.  */
53
54 source_location
55 find_bb_location (basic_block bb)
56 {
57   gimple stmt = NULL;
58   gimple_stmt_iterator si;
59
60   if (!bb)
61     return UNKNOWN_LOCATION;
62
63   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
64     {
65       stmt = gsi_stmt (si);
66       if (gimple_location (stmt) != UNKNOWN_LOCATION)
67         return gimple_location (stmt);
68     }
69
70   return UNKNOWN_LOCATION;
71 }
72
73
74 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
75
76 static void
77 vect_free_slp_tree (slp_tree node)
78 {
79   int i;
80   slp_tree child;
81
82   if (!node)
83     return;
84
85   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
86     vect_free_slp_tree (child);
87
88   SLP_TREE_CHILDREN (node).release ();
89   SLP_TREE_SCALAR_STMTS (node).release ();
90   SLP_TREE_VEC_STMTS (node).release ();
91   SLP_TREE_LOAD_PERMUTATION (node).release ();
92
93   free (node);
94 }
95
96
97 /* Free the memory allocated for the SLP instance.  */
98
99 void
100 vect_free_slp_instance (slp_instance instance)
101 {
102   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
103   SLP_INSTANCE_LOADS (instance).release ();
104   SLP_INSTANCE_BODY_COST_VEC (instance).release ();
105   free (instance);
106 }
107
108
109 /* Create an SLP node for SCALAR_STMTS.  */
110
111 static slp_tree
112 vect_create_new_slp_node (vec<gimple> scalar_stmts)
113 {
114   slp_tree node;
115   gimple stmt = scalar_stmts[0];
116   unsigned int nops;
117
118   if (is_gimple_call (stmt))
119     nops = gimple_call_num_args (stmt);
120   else if (is_gimple_assign (stmt))
121     {
122       nops = gimple_num_ops (stmt) - 1;
123       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
124         nops++;
125     }
126   else
127     return NULL;
128
129   node = XNEW (struct _slp_tree);
130   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
131   SLP_TREE_VEC_STMTS (node).create (0);
132   SLP_TREE_CHILDREN (node).create (nops);
133   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
134
135   return node;
136 }
137
138
139 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
140    operand.  */
141 static vec<slp_oprnd_info> 
142 vect_create_oprnd_info (int nops, int group_size)
143 {
144   int i;
145   slp_oprnd_info oprnd_info;
146   vec<slp_oprnd_info> oprnds_info;
147
148   oprnds_info.create (nops);
149   for (i = 0; i < nops; i++)
150     {
151       oprnd_info = XNEW (struct _slp_oprnd_info);
152       oprnd_info->def_stmts.create (group_size);
153       oprnd_info->first_dt = vect_uninitialized_def;
154       oprnd_info->first_op_type = NULL_TREE;
155       oprnd_info->first_pattern = false;
156       oprnds_info.quick_push (oprnd_info);
157     }
158
159   return oprnds_info;
160 }
161
162
163 /* Free operands info.  */
164
165 static void
166 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
167 {
168   int i;
169   slp_oprnd_info oprnd_info;
170
171   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
172     {
173       oprnd_info->def_stmts.release ();
174       XDELETE (oprnd_info);
175     }
176
177   oprnds_info.release ();
178 }
179
180
181 /* Find the place of the data-ref in STMT in the interleaving chain that starts
182    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
183
184 static int
185 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
186 {
187   gimple next_stmt = first_stmt;
188   int result = 0;
189
190   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
191     return -1;
192
193   do
194     {
195       if (next_stmt == stmt)
196         return result;
197       result++;
198       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
199     }
200   while (next_stmt);
201
202   return -1;
203 }
204
205
206 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
207    they are of a valid type and that they match the defs of the first stmt of
208    the SLP group (stored in OPRNDS_INFO).  */
209
210 static bool
211 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
212                              gimple stmt, bool first,
213                              vec<slp_oprnd_info> *oprnds_info)
214 {
215   tree oprnd;
216   unsigned int i, number_of_oprnds;
217   tree def;
218   gimple def_stmt;
219   enum vect_def_type dt = vect_uninitialized_def;
220   struct loop *loop = NULL;
221   bool pattern = false;
222   slp_oprnd_info oprnd_info;
223   int op_idx = 1;
224   tree compare_rhs = NULL_TREE;
225
226   if (loop_vinfo)
227     loop = LOOP_VINFO_LOOP (loop_vinfo);
228
229   if (is_gimple_call (stmt))
230     {
231       number_of_oprnds = gimple_call_num_args (stmt);
232       op_idx = 3;
233     }
234   else if (is_gimple_assign (stmt))
235     {
236       number_of_oprnds = gimple_num_ops (stmt) - 1;
237       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
238         number_of_oprnds++;
239     }
240   else
241     return false;
242
243   for (i = 0; i < number_of_oprnds; i++)
244     {
245       if (compare_rhs)
246         {
247           oprnd = compare_rhs;
248           compare_rhs = NULL_TREE;
249         }
250       else
251         oprnd = gimple_op (stmt, op_idx++);
252
253       oprnd_info = (*oprnds_info)[i];
254
255       if (COMPARISON_CLASS_P (oprnd))
256         {
257           compare_rhs = TREE_OPERAND (oprnd, 1);
258           oprnd = TREE_OPERAND (oprnd, 0);
259         }
260
261       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
262                                &def, &dt)
263           || (!def_stmt && dt != vect_constant_def))
264         {
265           if (dump_enabled_p ())
266             {
267               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268                                "Build SLP failed: can't find def for ");
269               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
270               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
271             }
272
273           return false;
274         }
275
276       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
277          from the pattern.  Check that all the stmts of the node are in the
278          pattern.  */
279       if (def_stmt && gimple_bb (def_stmt)
280           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
281               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
282                   && gimple_code (def_stmt) != GIMPLE_PHI))
283           && vinfo_for_stmt (def_stmt)
284           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
285           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
286           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
287         {
288           pattern = true;
289           if (!first && !oprnd_info->first_pattern)
290             {
291               if (dump_enabled_p ())
292                 {
293                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
294                                    "Build SLP failed: some of the stmts"
295                                    " are in a pattern, and others are not ");
296                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
297                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
298                 }
299
300               return false;
301             }
302
303           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
304           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
305
306           if (dt == vect_unknown_def_type)
307             {
308               if (dump_enabled_p ())
309                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
310                                  "Unsupported pattern.\n");
311               return false;
312             }
313
314           switch (gimple_code (def_stmt))
315             {
316               case GIMPLE_PHI:
317                 def = gimple_phi_result (def_stmt);
318                 break;
319
320               case GIMPLE_ASSIGN:
321                 def = gimple_assign_lhs (def_stmt);
322                 break;
323
324               default:
325                 if (dump_enabled_p ())
326                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
327                                    "unsupported defining stmt:\n");
328                 return false;
329             }
330         }
331
332       if (first)
333         {
334           oprnd_info->first_dt = dt;
335           oprnd_info->first_pattern = pattern;
336           oprnd_info->first_op_type = TREE_TYPE (oprnd);
337         }
338       else
339         {
340           /* Not first stmt of the group, check that the def-stmt/s match
341              the def-stmt/s of the first stmt.  Allow different definition
342              types for reduction chains: the first stmt must be a
343              vect_reduction_def (a phi node), and the rest
344              vect_internal_def.  */
345           if (((oprnd_info->first_dt != dt
346                 && !(oprnd_info->first_dt == vect_reduction_def
347                      && dt == vect_internal_def)
348                 && !((oprnd_info->first_dt == vect_external_def
349                       || oprnd_info->first_dt == vect_constant_def)
350                      && (dt == vect_external_def
351                          || dt == vect_constant_def)))
352                || !types_compatible_p (oprnd_info->first_op_type,
353                                        TREE_TYPE (oprnd))))
354             {
355               if (dump_enabled_p ())
356                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
357                                  "Build SLP failed: different types\n");
358
359               return false;
360             }
361         }
362
363       /* Check the types of the definitions.  */
364       switch (dt)
365         {
366         case vect_constant_def:
367         case vect_external_def:
368         case vect_reduction_def:
369           break;
370
371         case vect_internal_def:
372           oprnd_info->def_stmts.quick_push (def_stmt);
373           break;
374
375         default:
376           /* FORNOW: Not supported.  */
377           if (dump_enabled_p ())
378             {
379               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
380                                "Build SLP failed: illegal type of def ");
381               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
382               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
383             }
384
385           return false;
386         }
387     }
388
389   return true;
390 }
391
392
393 /* Verify if the scalar stmts STMTS are isomorphic, require data
394    permutation or are of unsupported types of operation.  Return
395    true if they are, otherwise return false and indicate in *MATCHES
396    which stmts are not isomorphic to the first one.  If MATCHES[0]
397    is false then this indicates the comparison could not be
398    carried out or the stmts will never be vectorized by SLP.  */
399
400 static bool
401 vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
402                        vec<gimple> stmts, unsigned int group_size,
403                        unsigned nops, unsigned int *max_nunits,
404                        unsigned int vectorization_factor, bool *matches)
405 {
406   unsigned int i;
407   gimple stmt = stmts[0];
408   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
409   enum tree_code first_cond_code = ERROR_MARK;
410   tree lhs;
411   bool need_same_oprnds = false;
412   tree vectype, scalar_type, first_op1 = NULL_TREE;
413   optab optab;
414   int icode;
415   enum machine_mode optab_op2_mode;
416   enum machine_mode vec_mode;
417   struct data_reference *first_dr;
418   HOST_WIDE_INT dummy;
419   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
420   tree cond;
421
422   /* For every stmt in NODE find its def stmt/s.  */
423   FOR_EACH_VEC_ELT (stmts, i, stmt)
424     {
425       matches[i] = false;
426
427       if (dump_enabled_p ())
428         {
429           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
430           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
431           dump_printf (MSG_NOTE, "\n");
432         }
433
434       /* Fail to vectorize statements marked as unvectorizable.  */
435       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
436         {
437           if (dump_enabled_p ())
438             {
439               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440                                "Build SLP failed: unvectorizable statement ");
441               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
442               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
443             }
444           /* Fatal mismatch.  */
445           matches[0] = false;
446           return false;
447         }
448
449       lhs = gimple_get_lhs (stmt);
450       if (lhs == NULL_TREE)
451         {
452           if (dump_enabled_p ())
453             {
454               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
455                                "Build SLP failed: not GIMPLE_ASSIGN nor "
456                                "GIMPLE_CALL ");
457               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
458               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
459             }
460           /* Fatal mismatch.  */
461           matches[0] = false;
462           return false;
463         }
464
465        if (is_gimple_assign (stmt)
466            && gimple_assign_rhs_code (stmt) == COND_EXPR
467            && (cond = gimple_assign_rhs1 (stmt))
468            && !COMPARISON_CLASS_P (cond))
469         {
470           if (dump_enabled_p ())
471             {
472               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
473                                "Build SLP failed: condition is not "
474                                "comparison ");
475               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
476               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
477             }
478           /* Fatal mismatch.  */
479           matches[0] = false;
480           return false;
481         }
482
483       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
484       vectype = get_vectype_for_scalar_type (scalar_type);
485       if (!vectype)
486         {
487           if (dump_enabled_p ())
488             {
489               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
490                                "Build SLP failed: unsupported data-type ");
491               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
492                                  scalar_type);
493               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
494             }
495           /* Fatal mismatch.  */
496           matches[0] = false;
497           return false;
498         }
499
500       /* In case of multiple types we need to detect the smallest type.  */
501       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
502         {
503           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
504           if (bb_vinfo)
505             vectorization_factor = *max_nunits;
506         }
507
508       if (is_gimple_call (stmt))
509         {
510           rhs_code = CALL_EXPR;
511           if (gimple_call_internal_p (stmt)
512               || gimple_call_tail_p (stmt)
513               || gimple_call_noreturn_p (stmt)
514               || !gimple_call_nothrow_p (stmt)
515               || gimple_call_chain (stmt))
516             {
517               if (dump_enabled_p ())
518                 {
519                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
520                                    "Build SLP failed: unsupported call type ");
521                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
522                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
523                 }
524               /* Fatal mismatch.  */
525               matches[0] = false;
526               return false;
527             }
528         }
529       else
530         rhs_code = gimple_assign_rhs_code (stmt);
531
532       /* Check the operation.  */
533       if (i == 0)
534         {
535           first_stmt_code = rhs_code;
536
537           /* Shift arguments should be equal in all the packed stmts for a
538              vector shift with scalar shift operand.  */
539           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
540               || rhs_code == LROTATE_EXPR
541               || rhs_code == RROTATE_EXPR)
542             {
543               vec_mode = TYPE_MODE (vectype);
544
545               /* First see if we have a vector/vector shift.  */
546               optab = optab_for_tree_code (rhs_code, vectype,
547                                            optab_vector);
548
549               if (!optab
550                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
551                 {
552                   /* No vector/vector shift, try for a vector/scalar shift.  */
553                   optab = optab_for_tree_code (rhs_code, vectype,
554                                                optab_scalar);
555
556                   if (!optab)
557                     {
558                       if (dump_enabled_p ())
559                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
560                                          "Build SLP failed: no optab.\n");
561                       /* Fatal mismatch.  */
562                       matches[0] = false;
563                       return false;
564                     }
565                   icode = (int) optab_handler (optab, vec_mode);
566                   if (icode == CODE_FOR_nothing)
567                     {
568                       if (dump_enabled_p ())
569                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
570                                          "Build SLP failed: "
571                                          "op not supported by target.\n");
572                       /* Fatal mismatch.  */
573                       matches[0] = false;
574                       return false;
575                     }
576                   optab_op2_mode = insn_data[icode].operand[2].mode;
577                   if (!VECTOR_MODE_P (optab_op2_mode))
578                     {
579                       need_same_oprnds = true;
580                       first_op1 = gimple_assign_rhs2 (stmt);
581                     }
582                 }
583             }
584           else if (rhs_code == WIDEN_LSHIFT_EXPR)
585             {
586               need_same_oprnds = true;
587               first_op1 = gimple_assign_rhs2 (stmt);
588             }
589         }
590       else
591         {
592           if (first_stmt_code != rhs_code
593               && (first_stmt_code != IMAGPART_EXPR
594                   || rhs_code != REALPART_EXPR)
595               && (first_stmt_code != REALPART_EXPR
596                   || rhs_code != IMAGPART_EXPR)
597               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
598                    && (first_stmt_code == ARRAY_REF
599                        || first_stmt_code == BIT_FIELD_REF
600                        || first_stmt_code == INDIRECT_REF
601                        || first_stmt_code == COMPONENT_REF
602                        || first_stmt_code == MEM_REF)))
603             {
604               if (dump_enabled_p ())
605                 {
606                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
607                                    "Build SLP failed: different operation "
608                                    "in stmt ");
609                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
610                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
611                 }
612               /* Mismatch.  */
613               continue;
614             }
615
616           if (need_same_oprnds
617               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
618             {
619               if (dump_enabled_p ())
620                 {
621                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
622                                    "Build SLP failed: different shift "
623                                    "arguments in ");
624                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
625                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
626                 }
627               /* Mismatch.  */
628               continue;
629             }
630
631           if (rhs_code == CALL_EXPR)
632             {
633               gimple first_stmt = stmts[0];
634               if (gimple_call_num_args (stmt) != nops
635                   || !operand_equal_p (gimple_call_fn (first_stmt),
636                                        gimple_call_fn (stmt), 0)
637                   || gimple_call_fntype (first_stmt)
638                      != gimple_call_fntype (stmt))
639                 {
640                   if (dump_enabled_p ())
641                     {
642                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
643                                        "Build SLP failed: different calls in ");
644                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
645                                         stmt, 0);
646                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
647                     }
648                   /* Mismatch.  */
649                   continue;
650                 }
651             }
652         }
653
654       /* Grouped store or load.  */
655       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
656         {
657           if (REFERENCE_CLASS_P (lhs))
658             {
659               /* Store.  */
660               ;
661             }
662           else
663             {
664               /* Load.  */
665               unsigned unrolling_factor
666                 = least_common_multiple
667                     (*max_nunits, group_size) / group_size;
668               /* FORNOW: Check that there is no gap between the loads
669                  and no gap between the groups when we need to load
670                  multiple groups at once.
671                  ???  We should enhance this to only disallow gaps
672                  inside vectors.  */
673               if ((unrolling_factor > 1
674                    && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
675                    && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
676                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
677                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
678                 {
679                   if (dump_enabled_p ())
680                     {
681                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682                                        "Build SLP failed: grouped "
683                                        "loads have gaps ");
684                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
685                                         stmt, 0);
686                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
687                     }
688                   /* Fatal mismatch.  */
689                   matches[0] = false;
690                   return false;
691                 }
692
693               /* Check that the size of interleaved loads group is not
694                  greater than the SLP group size.  */
695               unsigned ncopies
696                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
697               if (loop_vinfo
698                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
699                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
700                        - GROUP_GAP (vinfo_for_stmt (stmt)))
701                       > ncopies * group_size))
702                 {
703                   if (dump_enabled_p ())
704                     {
705                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
706                                        "Build SLP failed: the number "
707                                        "of interleaved loads is greater than "
708                                        "the SLP group size ");
709                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
710                                         stmt, 0);
711                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
712                     }
713                   /* Fatal mismatch.  */
714                   matches[0] = false;
715                   return false;
716                 }
717
718               old_first_load = first_load;
719               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
720               if (prev_first_load)
721                 {
722                   /* Check that there are no loads from different interleaving
723                      chains in the same node.  */
724                   if (prev_first_load != first_load)
725                     {
726                       if (dump_enabled_p ())
727                         {
728                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
729                                            vect_location, 
730                                            "Build SLP failed: different "
731                                            "interleaving chains in one node ");
732                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
733                                             stmt, 0);
734                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
735                         }
736                       /* Mismatch.  */
737                       continue;
738                     }
739                 }
740               else
741                 prev_first_load = first_load;
742
743               /* In some cases a group of loads is just the same load
744                  repeated N times.  Only analyze its cost once.  */
745               if (first_load == stmt && old_first_load != first_load)
746                 {
747                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
748                   if (vect_supportable_dr_alignment (first_dr, false)
749                       == dr_unaligned_unsupported)
750                     {
751                       if (dump_enabled_p ())
752                         {
753                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
754                                            vect_location, 
755                                            "Build SLP failed: unsupported "
756                                            "unaligned load ");
757                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
758                                             stmt, 0);
759                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
760                         }
761                       /* Fatal mismatch.  */
762                       matches[0] = false;
763                       return false;
764                     }
765                 }
766            }
767         } /* Grouped access.  */
768       else
769         {
770           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
771             {
772               /* Not grouped load.  */
773               if (dump_enabled_p ())
774                 {
775                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
776                                    "Build SLP failed: not grouped load ");
777                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
778                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
779                 }
780
781               /* FORNOW: Not grouped loads are not supported.  */
782               /* Fatal mismatch.  */
783               matches[0] = false;
784               return false;
785             }
786
787           /* Not memory operation.  */
788           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
789               && TREE_CODE_CLASS (rhs_code) != tcc_unary
790               && rhs_code != COND_EXPR
791               && rhs_code != CALL_EXPR)
792             {
793               if (dump_enabled_p ())
794                 {
795                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
796                                    "Build SLP failed: operation");
797                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
798                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
799                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
800                 }
801               /* Fatal mismatch.  */
802               matches[0] = false;
803               return false;
804             }
805
806           if (rhs_code == COND_EXPR)
807             {
808               tree cond_expr = gimple_assign_rhs1 (stmt);
809
810               if (i == 0)
811                 first_cond_code = TREE_CODE (cond_expr);
812               else if (first_cond_code != TREE_CODE (cond_expr))
813                 {
814                   if (dump_enabled_p ())
815                     {
816                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
817                                        "Build SLP failed: different"
818                                        " operation");
819                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
820                                         stmt, 0);
821                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
822                     }
823                   /* Mismatch.  */
824                   continue;
825                 }
826             }
827         }
828
829       matches[i] = true;
830     }
831
832   for (i = 0; i < group_size; ++i)
833     if (!matches[i])
834       return false;
835
836   return true;
837 }
838
839 /* Recursively build an SLP tree starting from NODE.
840    Fail (and return a value not equal to zero) if def-stmts are not
841    isomorphic, require data permutation or are of unsupported types of
842    operation.  Otherwise, return 0.
843    The value returned is the depth in the SLP tree where a mismatch
844    was found.  */
845
846 static bool
847 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
848                      slp_tree *node, unsigned int group_size,
849                      unsigned int *max_nunits,
850                      vec<slp_tree> *loads,
851                      unsigned int vectorization_factor,
852                      bool *matches, unsigned *npermutes)
853 {
854   unsigned nops, i, this_npermutes = 0;
855   gimple stmt;
856
857   if (!matches)
858     matches = XALLOCAVEC (bool, group_size);
859   if (!npermutes)
860     npermutes = &this_npermutes;
861
862   matches[0] = false;
863
864   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
865   if (is_gimple_call (stmt))
866     nops = gimple_call_num_args (stmt);
867   else if (is_gimple_assign (stmt))
868     {
869       nops = gimple_num_ops (stmt) - 1;
870       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
871         nops++;
872     }
873   else
874     return false;
875
876   if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
877                               SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
878                               max_nunits, vectorization_factor, matches))
879     return false;
880
881   /* If the SLP node is a load, terminate the recursion.  */
882   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
883       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
884     {
885       loads->safe_push (*node);
886       return true;
887     }
888
889   /* Get at the operands, verifying they are compatible.  */
890   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
891   slp_oprnd_info oprnd_info;
892   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
893     {
894       if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
895                                         stmt, (i == 0), &oprnds_info))
896         {
897           vect_free_oprnd_info (oprnds_info);
898           return false;
899         }
900     }
901
902   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
903
904   /* Create SLP_TREE nodes for the definition node/s.  */
905   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
906     {
907       slp_tree child;
908       unsigned old_nloads = loads->length ();
909       unsigned old_max_nunits = *max_nunits;
910
911       if (oprnd_info->first_dt != vect_internal_def)
912         continue;
913
914       child = vect_create_new_slp_node (oprnd_info->def_stmts);
915       if (!child)
916         {
917           vect_free_oprnd_info (oprnds_info);
918           return false;
919         }
920
921       bool *matches = XALLOCAVEC (bool, group_size);
922       if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
923                                group_size, max_nunits, loads,
924                                vectorization_factor, matches, npermutes))
925         {
926           oprnd_info->def_stmts = vNULL;
927           SLP_TREE_CHILDREN (*node).quick_push (child);
928           continue;
929         }
930
931       /* If the SLP build for operand zero failed and operand zero
932          and one can be commutated try that for the scalar stmts
933          that failed the match.  */
934       if (i == 0
935           /* A first scalar stmt mismatch signals a fatal mismatch.  */
936           && matches[0]
937           /* ???  For COND_EXPRs we can swap the comparison operands
938              as well as the arms under some constraints.  */
939           && nops == 2
940           && oprnds_info[1]->first_dt == vect_internal_def
941           && is_gimple_assign (stmt)
942           && commutative_tree_code (gimple_assign_rhs_code (stmt))
943           /* Do so only if the number of not successful permutes was nor more
944              than a cut-ff as re-trying the recursive match on
945              possibly each level of the tree would expose exponential
946              behavior.  */
947           && *npermutes < 4)
948         {
949           /* Roll back.  */
950           *max_nunits = old_max_nunits;
951           loads->truncate (old_nloads);
952           /* Swap mismatched definition stmts.  */
953           for (unsigned j = 0; j < group_size; ++j)
954             if (!matches[j])
955               {
956                 gimple tem = oprnds_info[0]->def_stmts[j];
957                 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
958                 oprnds_info[1]->def_stmts[j] = tem;
959               }
960           /* And try again ... */
961           if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
962                                    group_size, max_nunits, loads,
963                                    vectorization_factor,
964                                    matches, npermutes))
965             {
966               oprnd_info->def_stmts = vNULL;
967               SLP_TREE_CHILDREN (*node).quick_push (child);
968               continue;
969             }
970
971           ++*npermutes;
972         }
973
974       oprnd_info->def_stmts = vNULL;
975       vect_free_slp_tree (child);
976       vect_free_oprnd_info (oprnds_info);
977       return false;
978     }
979
980   vect_free_oprnd_info (oprnds_info);
981   return true;
982 }
983
984 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
985
986 static void
987 vect_print_slp_tree (int dump_kind, slp_tree node)
988 {
989   int i;
990   gimple stmt;
991   slp_tree child;
992
993   if (!node)
994     return;
995
996   dump_printf (dump_kind, "node ");
997   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
998     {
999       dump_printf (dump_kind, "\n\tstmt %d ", i);
1000       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1001     }
1002   dump_printf (dump_kind, "\n");
1003
1004   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1005     vect_print_slp_tree (dump_kind, child);
1006 }
1007
1008
1009 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1010    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1011    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1012    stmts in NODE are to be marked.  */
1013
1014 static void
1015 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1016 {
1017   int i;
1018   gimple stmt;
1019   slp_tree child;
1020
1021   if (!node)
1022     return;
1023
1024   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1025     if (j < 0 || i == j)
1026       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1027
1028   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1029     vect_mark_slp_stmts (child, mark, j);
1030 }
1031
1032
1033 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1034
1035 static void
1036 vect_mark_slp_stmts_relevant (slp_tree node)
1037 {
1038   int i;
1039   gimple stmt;
1040   stmt_vec_info stmt_info;
1041   slp_tree child;
1042
1043   if (!node)
1044     return;
1045
1046   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1047     {
1048       stmt_info = vinfo_for_stmt (stmt);
1049       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1050                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1051       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1052     }
1053
1054   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1055     vect_mark_slp_stmts_relevant (child);
1056 }
1057
1058
1059 /* Rearrange the statements of NODE according to PERMUTATION.  */
1060
1061 static void
1062 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1063                           vec<unsigned> permutation)
1064 {
1065   gimple stmt;
1066   vec<gimple> tmp_stmts;
1067   unsigned int i;
1068   slp_tree child;
1069
1070   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1071     vect_slp_rearrange_stmts (child, group_size, permutation);
1072
1073   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1074   tmp_stmts.create (group_size);
1075   tmp_stmts.quick_grow_cleared (group_size);
1076
1077   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1078     tmp_stmts[permutation[i]] = stmt;
1079
1080   SLP_TREE_SCALAR_STMTS (node).release ();
1081   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1082 }
1083
1084
1085 /* Check if the required load permutations in the SLP instance
1086    SLP_INSTN are supported.  */
1087
1088 static bool
1089 vect_supported_load_permutation_p (slp_instance slp_instn)
1090 {
1091   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1092   unsigned int i, j, k, next;
1093   sbitmap load_index;
1094   slp_tree node;
1095   gimple stmt, load, next_load, first_load;
1096   struct data_reference *dr;
1097
1098   if (dump_enabled_p ())
1099     {
1100       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1101       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1102         if (node->load_permutation.exists ())
1103           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1104             dump_printf (MSG_NOTE, "%d ", next);
1105         else
1106           for (i = 0; i < group_size; ++i)
1107             dump_printf (MSG_NOTE, "%d ", i);
1108       dump_printf (MSG_NOTE, "\n");
1109     }
1110
1111   /* In case of reduction every load permutation is allowed, since the order
1112      of the reduction statements is not important (as opposed to the case of
1113      grouped stores).  The only condition we need to check is that all the
1114      load nodes are of the same size and have the same permutation (and then
1115      rearrange all the nodes of the SLP instance according to this 
1116      permutation).  */
1117
1118   /* Check that all the load nodes are of the same size.  */
1119   /* ???  Can't we assert this? */
1120   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1121     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1122       return false;
1123
1124   node = SLP_INSTANCE_TREE (slp_instn);
1125   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1126
1127   /* Reduction (there are no data-refs in the root).
1128      In reduction chain the order of the loads is important.  */
1129   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1130       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1131     {
1132       slp_tree load;
1133       unsigned int lidx;
1134
1135       /* Compare all the permutation sequences to the first one.  We know
1136          that at least one load is permuted.  */
1137       node = SLP_INSTANCE_LOADS (slp_instn)[0];
1138       if (!node->load_permutation.exists ())
1139         return false;
1140       for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1141         {
1142           if (!load->load_permutation.exists ())
1143             return false;
1144           FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1145             if (lidx != node->load_permutation[j])
1146               return false;
1147         }
1148
1149       /* Check that the loads in the first sequence are different and there
1150          are no gaps between them.  */
1151       load_index = sbitmap_alloc (group_size);
1152       bitmap_clear (load_index);
1153       FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1154         {
1155           if (bitmap_bit_p (load_index, lidx))
1156             {
1157               sbitmap_free (load_index);
1158               return false;
1159             }
1160           bitmap_set_bit (load_index, lidx);
1161         }
1162       for (i = 0; i < group_size; i++)
1163         if (!bitmap_bit_p (load_index, i))
1164           {
1165             sbitmap_free (load_index);
1166             return false;
1167           }
1168       sbitmap_free (load_index);
1169
1170       /* This permutation is valid for reduction.  Since the order of the
1171          statements in the nodes is not important unless they are memory
1172          accesses, we can rearrange the statements in all the nodes
1173          according to the order of the loads.  */
1174       vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1175                                 node->load_permutation);
1176
1177       /* We are done, no actual permutations need to be generated.  */
1178       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1179         SLP_TREE_LOAD_PERMUTATION (node).release ();
1180       return true;
1181     }
1182
1183   /* In basic block vectorization we allow any subchain of an interleaving
1184      chain.
1185      FORNOW: not supported in loop SLP because of realignment compications.  */
1186   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1187     {
1188       /* Check that for every node in the instance the loads
1189          form a subchain.  */
1190       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1191         {
1192           next_load = NULL;
1193           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1194             {
1195               if (j != 0 && next_load != load)
1196                 return false;
1197               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1198             }
1199         }
1200
1201       /* Check that the alignment of the first load in every subchain, i.e.,
1202          the first statement in every load node, is supported.
1203          ???  This belongs in alignment checking.  */
1204       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1205         {
1206           first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1207           if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1208             {
1209               dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1210               if (vect_supportable_dr_alignment (dr, false)
1211                   == dr_unaligned_unsupported)
1212                 {
1213                   if (dump_enabled_p ())
1214                     {
1215                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1216                                        vect_location,
1217                                        "unsupported unaligned load ");
1218                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1219                                         first_load, 0);
1220                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1221                     }
1222                   return false;
1223                 }
1224             }
1225         }
1226
1227       /* We are done, no actual permutations need to be generated.  */
1228       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1229         SLP_TREE_LOAD_PERMUTATION (node).release ();
1230       return true;
1231     }
1232
1233   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1234      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1235      well (unless it's reduction).  */
1236   if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
1237     return false;
1238   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1239     if (!node->load_permutation.exists ())
1240       return false;
1241
1242   load_index = sbitmap_alloc (group_size);
1243   bitmap_clear (load_index);
1244   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1245     {
1246       unsigned int lidx = node->load_permutation[0];
1247       if (bitmap_bit_p (load_index, lidx))
1248         {
1249           sbitmap_free (load_index);
1250           return false;
1251         }
1252       bitmap_set_bit (load_index, lidx);
1253       FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1254         if (k != lidx)
1255           {
1256             sbitmap_free (load_index);
1257             return false;
1258           }
1259     }
1260   for (i = 0; i < group_size; i++)
1261     if (!bitmap_bit_p (load_index, i))
1262       {
1263         sbitmap_free (load_index);
1264         return false;
1265       }
1266   sbitmap_free (load_index);
1267
1268   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1269     if (node->load_permutation.exists ()
1270         && !vect_transform_slp_perm_load
1271               (node, vNULL, NULL,
1272                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1273       return false;
1274   return true;
1275 }
1276
1277
1278 /* Find the first load in the loop that belongs to INSTANCE.
1279    When loads are in several SLP nodes, there can be a case in which the first
1280    load does not appear in the first SLP node to be transformed, causing
1281    incorrect order of statements.  Since we generate all the loads together,
1282    they must be inserted before the first load of the SLP instance and not
1283    before the first load of the first node of the instance.  */
1284
1285 static gimple
1286 vect_find_first_load_in_slp_instance (slp_instance instance)
1287 {
1288   int i, j;
1289   slp_tree load_node;
1290   gimple first_load = NULL, load;
1291
1292   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1293     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1294       first_load = get_earlier_stmt (load, first_load);
1295
1296   return first_load;
1297 }
1298
1299
1300 /* Find the last store in SLP INSTANCE.  */
1301
1302 static gimple
1303 vect_find_last_store_in_slp_instance (slp_instance instance)
1304 {
1305   int i;
1306   slp_tree node;
1307   gimple last_store = NULL, store;
1308
1309   node = SLP_INSTANCE_TREE (instance);
1310   for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1311     last_store = get_later_stmt (store, last_store);
1312
1313   return last_store;
1314 }
1315
1316 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1317
1318 static void
1319 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1320                          slp_instance instance, slp_tree node,
1321                          stmt_vector_for_cost *prologue_cost_vec,
1322                          unsigned ncopies_for_cost)
1323 {
1324   stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1325
1326   unsigned i;
1327   slp_tree child;
1328   gimple stmt, s;
1329   stmt_vec_info stmt_info;
1330   tree lhs;
1331   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1332
1333   /* Recurse down the SLP tree.  */
1334   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1335     vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1336                              instance, child, prologue_cost_vec,
1337                              ncopies_for_cost);
1338
1339   /* Look at the first scalar stmt to determine the cost.  */
1340   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1341   stmt_info = vinfo_for_stmt (stmt);
1342   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1343     {
1344       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1345         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1346                                vect_uninitialized_def,
1347                                node, prologue_cost_vec, body_cost_vec);
1348       else
1349         {
1350           int i;
1351           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1352           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1353                                 node, prologue_cost_vec, body_cost_vec);
1354           /* If the load is permuted record the cost for the permutation.
1355              ???  Loads from multiple chains are let through here only
1356              for a single special case involving complex numbers where
1357              in the end no permutation is necessary.  */
1358           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1359             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1360                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1361                 && vect_get_place_in_interleaving_chain
1362                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1363               {
1364                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1365                                   stmt_info, 0, vect_body);
1366                 break;
1367               }
1368         }
1369     }
1370   else
1371     record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1372                       stmt_info, 0, vect_body);
1373
1374   /* Scan operands and account for prologue cost of constants/externals.
1375      ???  This over-estimates cost for multiple uses and should be
1376      re-engineered.  */
1377   lhs = gimple_get_lhs (stmt);
1378   for (i = 0; i < gimple_num_ops (stmt); ++i)
1379     {
1380       tree def, op = gimple_op (stmt, i);
1381       gimple def_stmt;
1382       enum vect_def_type dt;
1383       if (!op || op == lhs)
1384         continue;
1385       if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1386                               &def_stmt, &def, &dt)
1387           && (dt == vect_constant_def || dt == vect_external_def))
1388         record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1389                           stmt_info, 0, vect_prologue);
1390     }
1391 }
1392
1393 /* Compute the cost for the SLP instance INSTANCE.  */
1394
1395 static void
1396 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1397                        slp_instance instance, unsigned nunits)
1398 {
1399   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1400   unsigned ncopies_for_cost;
1401   stmt_info_for_cost *si;
1402   unsigned i;
1403
1404   /* Calculate the number of vector stmts to create based on the unrolling
1405      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1406      GROUP_SIZE / NUNITS otherwise.  */
1407   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1408   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1409
1410   prologue_cost_vec.create (10);
1411   body_cost_vec.create (10);
1412   SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1413   vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1414                            instance, SLP_INSTANCE_TREE (instance),
1415                            &prologue_cost_vec, ncopies_for_cost);
1416
1417   /* Record the prologue costs, which were delayed until we were
1418      sure that SLP was successful.  Unlike the body costs, we know
1419      the final values now regardless of the loop vectorization factor.  */
1420   void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1421                 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1422   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1423     {
1424       struct _stmt_vec_info *stmt_info
1425         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1426       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1427                             si->misalign, vect_prologue);
1428     }
1429
1430   prologue_cost_vec.release ();
1431 }
1432
1433 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1434    vect_build_slp_tree to build a tree of packed stmts if possible.
1435    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1436
1437 static bool
1438 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1439                            gimple stmt)
1440 {
1441   slp_instance new_instance;
1442   slp_tree node;
1443   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1444   unsigned int unrolling_factor = 1, nunits;
1445   tree vectype, scalar_type = NULL_TREE;
1446   gimple next;
1447   unsigned int vectorization_factor = 0;
1448   int i;
1449   unsigned int max_nunits = 0;
1450   vec<slp_tree> loads;
1451   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1452   vec<gimple> scalar_stmts;
1453
1454   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1455     {
1456       if (dr)
1457         {
1458           scalar_type = TREE_TYPE (DR_REF (dr));
1459           vectype = get_vectype_for_scalar_type (scalar_type);
1460         }
1461       else
1462         {
1463           gcc_assert (loop_vinfo);
1464           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1465         }
1466
1467       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1468     }
1469   else
1470     {
1471       gcc_assert (loop_vinfo);
1472       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1473       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1474     }
1475
1476   if (!vectype)
1477     {
1478       if (dump_enabled_p ())
1479         {
1480           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1481                            "Build SLP failed: unsupported data-type ");
1482           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1483           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1484         }
1485
1486       return false;
1487     }
1488
1489   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1490   if (loop_vinfo)
1491     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1492   else
1493     vectorization_factor = nunits;
1494
1495   /* Calculate the unrolling factor.  */
1496   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1497   if (unrolling_factor != 1 && !loop_vinfo)
1498     {
1499       if (dump_enabled_p ())
1500         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1501                          "Build SLP failed: unrolling required in basic"
1502                          " block SLP\n");
1503
1504       return false;
1505     }
1506
1507   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1508   scalar_stmts.create (group_size);
1509   next = stmt;
1510   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1511     {
1512       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1513       while (next)
1514         {
1515           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1516               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1517             scalar_stmts.safe_push (
1518                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1519           else
1520             scalar_stmts.safe_push (next);
1521           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1522         }
1523     }
1524   else
1525     {
1526       /* Collect reduction statements.  */
1527       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1528       for (i = 0; reductions.iterate (i, &next); i++)
1529         scalar_stmts.safe_push (next);
1530     }
1531
1532   node = vect_create_new_slp_node (scalar_stmts);
1533
1534   loads.create (group_size);
1535
1536   /* Build the tree for the SLP instance.  */
1537   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1538                            &max_nunits, &loads,
1539                            vectorization_factor, NULL, NULL))
1540     {
1541       /* Calculate the unrolling factor based on the smallest type.  */
1542       if (max_nunits > nunits)
1543         unrolling_factor = least_common_multiple (max_nunits, group_size)
1544                            / group_size;
1545
1546       if (unrolling_factor != 1 && !loop_vinfo)
1547         {
1548           if (dump_enabled_p ())
1549             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1550                              "Build SLP failed: unrolling required in basic"
1551                              " block SLP\n");
1552           vect_free_slp_tree (node);
1553           loads.release ();
1554           return false;
1555         }
1556
1557       /* Create a new SLP instance.  */
1558       new_instance = XNEW (struct _slp_instance);
1559       SLP_INSTANCE_TREE (new_instance) = node;
1560       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1561       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1562       SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1563       SLP_INSTANCE_LOADS (new_instance) = loads;
1564       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1565
1566       /* Compute the load permutation.  */
1567       slp_tree load_node;
1568       bool loads_permuted = false;
1569       FOR_EACH_VEC_ELT (loads, i, load_node)
1570         {
1571           vec<unsigned> load_permutation;
1572           int j;
1573           gimple load, first_stmt;
1574           bool this_load_permuted = false;
1575           load_permutation.create (group_size);
1576           first_stmt = GROUP_FIRST_ELEMENT
1577               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1578           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1579             {
1580               int load_place
1581                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1582               gcc_assert (load_place != -1);
1583               if (load_place != j)
1584                 this_load_permuted = true;
1585               load_permutation.safe_push (load_place);
1586             }
1587           if (!this_load_permuted)
1588             {
1589               load_permutation.release ();
1590               continue;
1591             }
1592           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1593           loads_permuted = true;
1594         }
1595
1596       if (loads_permuted)
1597         {
1598           if (!vect_supported_load_permutation_p (new_instance))
1599             {
1600               if (dump_enabled_p ())
1601                 {
1602                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1603                                    "Build SLP failed: unsupported load "
1604                                    "permutation ");
1605                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1606                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1607                 }
1608               vect_free_slp_instance (new_instance);
1609               return false;
1610             }
1611
1612           SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1613             = vect_find_first_load_in_slp_instance (new_instance);
1614         }
1615
1616       /* Compute the costs of this SLP instance.  */
1617       vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1618                              new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1619
1620       if (loop_vinfo)
1621         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1622       else
1623         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1624
1625       if (dump_enabled_p ())
1626         vect_print_slp_tree (MSG_NOTE, node);
1627
1628       return true;
1629     }
1630
1631   /* Failed to SLP.  */
1632   /* Free the allocated memory.  */
1633   vect_free_slp_tree (node);
1634   loads.release ();
1635
1636   return false;
1637 }
1638
1639
1640 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1641    trees of packed scalar stmts if SLP is possible.  */
1642
1643 bool
1644 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1645 {
1646   unsigned int i;
1647   vec<gimple> grouped_stores;
1648   vec<gimple> reductions = vNULL;
1649   vec<gimple> reduc_chains = vNULL;
1650   gimple first_element;
1651   bool ok = false;
1652
1653   if (dump_enabled_p ())
1654     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1655
1656   if (loop_vinfo)
1657     {
1658       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1659       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1660       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1661     }
1662   else
1663     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1664
1665   /* Find SLP sequences starting from groups of grouped stores.  */
1666   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1667     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1668       ok = true;
1669
1670   if (bb_vinfo && !ok)
1671     {
1672       if (dump_enabled_p ())
1673         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1674                          "Failed to SLP the basic block.\n");
1675
1676       return false;
1677     }
1678
1679   if (loop_vinfo
1680       && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1681     {
1682       /* Find SLP sequences starting from reduction chains.  */
1683       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1684         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1685           ok = true;
1686         else
1687           return false;
1688
1689       /* Don't try to vectorize SLP reductions if reduction chain was
1690          detected.  */
1691       return ok;
1692     }
1693
1694   /* Find SLP sequences starting from groups of reductions.  */
1695   if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1696       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1697     ok = true;
1698
1699   return true;
1700 }
1701
1702
1703 /* For each possible SLP instance decide whether to SLP it and calculate overall
1704    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1705    least one instance.  */
1706
1707 bool
1708 vect_make_slp_decision (loop_vec_info loop_vinfo)
1709 {
1710   unsigned int i, unrolling_factor = 1;
1711   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1712   slp_instance instance;
1713   int decided_to_slp = 0;
1714
1715   if (dump_enabled_p ())
1716     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1717                      "\n");
1718
1719   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1720     {
1721       /* FORNOW: SLP if you can.  */
1722       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1723         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1724
1725       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1726          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1727          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1728       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1729       decided_to_slp++;
1730     }
1731
1732   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1733
1734   if (decided_to_slp && dump_enabled_p ())
1735     dump_printf_loc (MSG_NOTE, vect_location,
1736                      "Decided to SLP %d instances. Unrolling factor %d\n",
1737                      decided_to_slp, unrolling_factor);
1738
1739   return (decided_to_slp > 0);
1740 }
1741
1742
1743 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1744    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1745
1746 static void
1747 vect_detect_hybrid_slp_stmts (slp_tree node)
1748 {
1749   int i;
1750   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1751   gimple stmt = stmts[0];
1752   imm_use_iterator imm_iter;
1753   gimple use_stmt;
1754   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1755   slp_tree child;
1756   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1757   struct loop *loop = NULL;
1758   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1759   basic_block bb = NULL;
1760
1761   if (!node)
1762     return;
1763
1764   if (loop_vinfo)
1765     loop = LOOP_VINFO_LOOP (loop_vinfo);
1766   else
1767     bb = BB_VINFO_BB (bb_vinfo);
1768
1769   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1770     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1771         && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1772       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1773         if (gimple_bb (use_stmt)
1774             && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1775                  || bb == gimple_bb (use_stmt))
1776             && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1777             && !STMT_SLP_TYPE (stmt_vinfo)
1778             && (STMT_VINFO_RELEVANT (stmt_vinfo)
1779                 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1780             && !(gimple_code (use_stmt) == GIMPLE_PHI
1781                  && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1782                   == vect_reduction_def))
1783           vect_mark_slp_stmts (node, hybrid, i);
1784
1785   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1786     vect_detect_hybrid_slp_stmts (child);
1787 }
1788
1789
1790 /* Find stmts that must be both vectorized and SLPed.  */
1791
1792 void
1793 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1794 {
1795   unsigned int i;
1796   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1797   slp_instance instance;
1798
1799   if (dump_enabled_p ())
1800     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1801                      "\n");
1802
1803   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1804     vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1805 }
1806
1807
1808 /* Create and initialize a new bb_vec_info struct for BB, as well as
1809    stmt_vec_info structs for all the stmts in it.  */
1810
1811 static bb_vec_info
1812 new_bb_vec_info (basic_block bb)
1813 {
1814   bb_vec_info res = NULL;
1815   gimple_stmt_iterator gsi;
1816
1817   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1818   BB_VINFO_BB (res) = bb;
1819
1820   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1821     {
1822       gimple stmt = gsi_stmt (gsi);
1823       gimple_set_uid (stmt, 0);
1824       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1825     }
1826
1827   BB_VINFO_GROUPED_STORES (res).create (10);
1828   BB_VINFO_SLP_INSTANCES (res).create (2);
1829   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1830
1831   bb->aux = res;
1832   return res;
1833 }
1834
1835
1836 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1837    stmts in the basic block.  */
1838
1839 static void
1840 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1841 {
1842   vec<slp_instance> slp_instances;
1843   slp_instance instance;
1844   basic_block bb;
1845   gimple_stmt_iterator si;
1846   unsigned i;
1847
1848   if (!bb_vinfo)
1849     return;
1850
1851   bb = BB_VINFO_BB (bb_vinfo);
1852
1853   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1854     {
1855       gimple stmt = gsi_stmt (si);
1856       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1857
1858       if (stmt_info)
1859         /* Free stmt_vec_info.  */
1860         free_stmt_vec_info (stmt);
1861     }
1862
1863   vect_destroy_datarefs (NULL, bb_vinfo);
1864   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1865   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1866   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1867   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1868     vect_free_slp_instance (instance);
1869   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1870   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1871   free (bb_vinfo);
1872   bb->aux = NULL;
1873 }
1874
1875
1876 /* Analyze statements contained in SLP tree node after recursively analyzing
1877    the subtree. Return TRUE if the operations are supported.  */
1878
1879 static bool
1880 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1881 {
1882   bool dummy;
1883   int i;
1884   gimple stmt;
1885   slp_tree child;
1886
1887   if (!node)
1888     return true;
1889
1890   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1891     if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1892       return false;
1893
1894   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1895     {
1896       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1897       gcc_assert (stmt_info);
1898       gcc_assert (PURE_SLP_STMT (stmt_info));
1899
1900       if (!vect_analyze_stmt (stmt, &dummy, node))
1901         return false;
1902     }
1903
1904   return true;
1905 }
1906
1907
1908 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
1909    operations are supported. */
1910
1911 static bool
1912 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1913 {
1914   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1915   slp_instance instance;
1916   int i;
1917
1918   for (i = 0; slp_instances.iterate (i, &instance); )
1919     {
1920       if (!vect_slp_analyze_node_operations (bb_vinfo,
1921                                              SLP_INSTANCE_TREE (instance)))
1922         {
1923           vect_free_slp_instance (instance);
1924           slp_instances.ordered_remove (i);
1925         }
1926       else
1927         i++;
1928     }
1929
1930   if (!slp_instances.length ())
1931     return false;
1932
1933   return true;
1934 }
1935
1936
1937 /* Compute the scalar cost of the SLP node NODE and its children
1938    and return it.  Do not account defs that are marked in LIFE and
1939    update LIFE according to uses of NODE.  */
1940
1941 static unsigned
1942 vect_bb_slp_scalar_cost (basic_block bb,
1943                          slp_tree node, vec<bool, va_heap> *life)
1944 {
1945   unsigned scalar_cost = 0;
1946   unsigned i;
1947   gimple stmt;
1948   slp_tree child;
1949
1950   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1951     {
1952       unsigned stmt_cost;
1953       ssa_op_iter op_iter;
1954       def_operand_p def_p;
1955       stmt_vec_info stmt_info;
1956
1957       if ((*life)[i])
1958         continue;
1959
1960       /* If there is a non-vectorized use of the defs then the scalar
1961          stmt is kept live in which case we do not account it or any
1962          required defs in the SLP children in the scalar cost.  This
1963          way we make the vectorization more costly when compared to
1964          the scalar cost.  */
1965       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
1966         {
1967           imm_use_iterator use_iter;
1968           gimple use_stmt;
1969           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
1970             if (gimple_code (use_stmt) == GIMPLE_PHI
1971                 || gimple_bb (use_stmt) != bb
1972                 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))
1973               {
1974                 (*life)[i] = true;
1975                 BREAK_FROM_IMM_USE_STMT (use_iter);
1976               }
1977         }
1978       if ((*life)[i])
1979         continue;
1980
1981       stmt_info = vinfo_for_stmt (stmt);
1982       if (STMT_VINFO_DATA_REF (stmt_info))
1983         {
1984           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1985             stmt_cost = vect_get_stmt_cost (scalar_load);
1986           else
1987             stmt_cost = vect_get_stmt_cost (scalar_store);
1988         }
1989       else
1990         stmt_cost = vect_get_stmt_cost (scalar_stmt);
1991
1992       scalar_cost += stmt_cost;
1993     }
1994
1995   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1996     scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
1997
1998   return scalar_cost;
1999 }
2000
2001 /* Check if vectorization of the basic block is profitable.  */
2002
2003 static bool
2004 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2005 {
2006   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2007   slp_instance instance;
2008   int i, j;
2009   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2010   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2011   void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2012   stmt_vec_info stmt_info = NULL;
2013   stmt_vector_for_cost body_cost_vec;
2014   stmt_info_for_cost *ci;
2015
2016   /* Calculate vector costs.  */
2017   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2018     {
2019       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2020
2021       FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2022         {
2023           stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2024           (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2025                                 stmt_info, ci->misalign, vect_body);
2026         }
2027     }
2028
2029   /* Calculate scalar cost.  */
2030   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2031     {
2032       auto_vec<bool, 20> life;
2033       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2034       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2035                                               SLP_INSTANCE_TREE (instance),
2036                                               &life);
2037     }
2038
2039   /* Complete the target-specific cost calculation.  */
2040   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2041                &vec_inside_cost, &vec_epilogue_cost);
2042
2043   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2044
2045   if (dump_enabled_p ())
2046     {
2047       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2048       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2049                    vec_inside_cost);
2050       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2051       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2052       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
2053     }
2054
2055   /* Vectorization is profitable if its cost is less than the cost of scalar
2056      version.  */
2057   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2058     return false;
2059
2060   return true;
2061 }
2062
2063 /* Check if the basic block can be vectorized.  */
2064
2065 static bb_vec_info
2066 vect_slp_analyze_bb_1 (basic_block bb)
2067 {
2068   bb_vec_info bb_vinfo;
2069   vec<slp_instance> slp_instances;
2070   slp_instance instance;
2071   int i;
2072   int min_vf = 2;
2073
2074   bb_vinfo = new_bb_vec_info (bb);
2075   if (!bb_vinfo)
2076     return NULL;
2077
2078   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2079     {
2080       if (dump_enabled_p ())
2081         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2082                          "not vectorized: unhandled data-ref in basic "
2083                          "block.\n");
2084
2085       destroy_bb_vec_info (bb_vinfo);
2086       return NULL;
2087     }
2088
2089   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2090     {
2091       if (dump_enabled_p ())
2092         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2093                          "not vectorized: not enough data-refs in "
2094                          "basic block.\n");
2095
2096       destroy_bb_vec_info (bb_vinfo);
2097       return NULL;
2098     }
2099
2100   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2101     {
2102      if (dump_enabled_p ())
2103        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2104                         "not vectorized: unhandled data access in "
2105                         "basic block.\n");
2106
2107       destroy_bb_vec_info (bb_vinfo);
2108       return NULL;
2109     }
2110
2111   vect_pattern_recog (NULL, bb_vinfo);
2112
2113   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2114     {
2115       if (dump_enabled_p ())
2116         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2117                          "not vectorized: bad data alignment in basic "
2118                          "block.\n");
2119
2120       destroy_bb_vec_info (bb_vinfo);
2121       return NULL;
2122     }
2123
2124   /* Check the SLP opportunities in the basic block, analyze and build SLP
2125      trees.  */
2126   if (!vect_analyze_slp (NULL, bb_vinfo))
2127     {
2128       if (dump_enabled_p ())
2129         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2130                          "not vectorized: failed to find SLP opportunities "
2131                          "in basic block.\n");
2132
2133       destroy_bb_vec_info (bb_vinfo);
2134       return NULL;
2135     }
2136
2137   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2138
2139   /* Mark all the statements that we want to vectorize as pure SLP and
2140      relevant.  */
2141   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2142     {
2143       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2144       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2145     }
2146
2147   /* Mark all the statements that we do not want to vectorize.  */
2148   for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2149        !gsi_end_p (gsi); gsi_next (&gsi))
2150     {
2151       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2152       if (STMT_SLP_TYPE (vinfo) != pure_slp)
2153         STMT_VINFO_VECTORIZABLE (vinfo) = false;
2154     }
2155
2156   /* Analyze dependences.  At this point all stmts not participating in
2157      vectorization have to be marked.  Dependence analysis assumes
2158      that we either vectorize all SLP instances or none at all.  */
2159   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2160      {
2161        if (dump_enabled_p ())
2162          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2163                           "not vectorized: unhandled data dependence "
2164                           "in basic block.\n");
2165
2166        destroy_bb_vec_info (bb_vinfo);
2167        return NULL;
2168      }
2169
2170   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2171     {
2172       if (dump_enabled_p ())
2173         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2174                          "not vectorized: unsupported alignment in basic "
2175                          "block.\n");
2176       destroy_bb_vec_info (bb_vinfo);
2177       return NULL;
2178     }
2179
2180   if (!vect_slp_analyze_operations (bb_vinfo))
2181     {
2182       if (dump_enabled_p ())
2183         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2184                          "not vectorized: bad operation in basic block.\n");
2185
2186       destroy_bb_vec_info (bb_vinfo);
2187       return NULL;
2188     }
2189
2190   /* Cost model: check if the vectorization is worthwhile.  */
2191   if (!unlimited_cost_model (NULL)
2192       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2193     {
2194       if (dump_enabled_p ())
2195         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2196                          "not vectorized: vectorization is not "
2197                          "profitable.\n");
2198
2199       destroy_bb_vec_info (bb_vinfo);
2200       return NULL;
2201     }
2202
2203   if (dump_enabled_p ())
2204     dump_printf_loc (MSG_NOTE, vect_location,
2205                      "Basic block will be vectorized using SLP\n");
2206
2207   return bb_vinfo;
2208 }
2209
2210
2211 bb_vec_info
2212 vect_slp_analyze_bb (basic_block bb)
2213 {
2214   bb_vec_info bb_vinfo;
2215   int insns = 0;
2216   gimple_stmt_iterator gsi;
2217   unsigned int vector_sizes;
2218
2219   if (dump_enabled_p ())
2220     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2221
2222   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2223     {
2224       gimple stmt = gsi_stmt (gsi);
2225       if (!is_gimple_debug (stmt)
2226           && !gimple_nop_p (stmt)
2227           && gimple_code (stmt) != GIMPLE_LABEL)
2228         insns++;
2229     }
2230
2231   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2232     {
2233       if (dump_enabled_p ())
2234         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2235                          "not vectorized: too many instructions in "
2236                          "basic block.\n");
2237
2238       return NULL;
2239     }
2240
2241   /* Autodetect first vector size we try.  */
2242   current_vector_size = 0;
2243   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2244
2245   while (1)
2246     {
2247       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2248       if (bb_vinfo)
2249         return bb_vinfo;
2250
2251       destroy_bb_vec_info (bb_vinfo);
2252
2253       vector_sizes &= ~current_vector_size;
2254       if (vector_sizes == 0
2255           || current_vector_size == 0)
2256         return NULL;
2257
2258       /* Try the next biggest vector size.  */
2259       current_vector_size = 1 << floor_log2 (vector_sizes);
2260       if (dump_enabled_p ())
2261         dump_printf_loc (MSG_NOTE, vect_location,
2262                          "***** Re-trying analysis with "
2263                          "vector size %d\n", current_vector_size);
2264     }
2265 }
2266
2267
2268 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2269    the number of created vector stmts depends on the unrolling factor).
2270    However, the actual number of vector stmts for every SLP node depends on
2271    VF which is set later in vect_analyze_operations ().  Hence, SLP costs
2272    should be updated.  In this function we assume that the inside costs
2273    calculated in vect_model_xxx_cost are linear in ncopies.  */
2274
2275 void
2276 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2277 {
2278   unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2279   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2280   slp_instance instance;
2281   stmt_vector_for_cost body_cost_vec;
2282   stmt_info_for_cost *si;
2283   void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2284
2285   if (dump_enabled_p ())
2286     dump_printf_loc (MSG_NOTE, vect_location,
2287                      "=== vect_update_slp_costs_according_to_vf ===\n");
2288
2289   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2290     {
2291       /* We assume that costs are linear in ncopies.  */
2292       int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2293
2294       /* Record the instance's instructions in the target cost model.
2295          This was delayed until here because the count of instructions
2296          isn't known beforehand.  */
2297       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2298
2299       FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2300         (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2301                               vinfo_for_stmt (si->stmt), si->misalign,
2302                               vect_body);
2303     }
2304 }
2305
2306
2307 /* For constant and loop invariant defs of SLP_NODE this function returns
2308    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2309    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2310    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2311    REDUC_INDEX is the index of the reduction operand in the statements, unless
2312    it is -1.  */
2313
2314 static void
2315 vect_get_constant_vectors (tree op, slp_tree slp_node,
2316                            vec<tree> *vec_oprnds,
2317                            unsigned int op_num, unsigned int number_of_vectors,
2318                            int reduc_index)
2319 {
2320   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2321   gimple stmt = stmts[0];
2322   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2323   unsigned nunits;
2324   tree vec_cst;
2325   tree *elts;
2326   unsigned j, number_of_places_left_in_vector;
2327   tree vector_type;
2328   tree vop;
2329   int group_size = stmts.length ();
2330   unsigned int vec_num, i;
2331   unsigned number_of_copies = 1;
2332   vec<tree> voprnds;
2333   voprnds.create (number_of_vectors);
2334   bool constant_p, is_store;
2335   tree neutral_op = NULL;
2336   enum tree_code code = gimple_expr_code (stmt);
2337   gimple def_stmt;
2338   struct loop *loop;
2339   gimple_seq ctor_seq = NULL;
2340
2341   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2342       && reduc_index != -1)
2343     {
2344       op_num = reduc_index - 1;
2345       op = gimple_op (stmt, reduc_index);
2346       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2347          we need either neutral operands or the original operands.  See
2348          get_initial_def_for_reduction() for details.  */
2349       switch (code)
2350         {
2351           case WIDEN_SUM_EXPR:
2352           case DOT_PROD_EXPR:
2353           case PLUS_EXPR:
2354           case MINUS_EXPR:
2355           case BIT_IOR_EXPR:
2356           case BIT_XOR_EXPR:
2357              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2358                neutral_op = build_real (TREE_TYPE (op), dconst0);
2359              else
2360                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2361
2362              break;
2363
2364           case MULT_EXPR:
2365              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2366                neutral_op = build_real (TREE_TYPE (op), dconst1);
2367              else
2368                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2369
2370              break;
2371
2372           case BIT_AND_EXPR:
2373             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2374             break;
2375
2376           case MAX_EXPR:
2377           case MIN_EXPR:
2378             def_stmt = SSA_NAME_DEF_STMT (op);
2379             loop = (gimple_bb (stmt))->loop_father;
2380             neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2381                                                 loop_preheader_edge (loop));
2382             break;
2383
2384           default:
2385             neutral_op = NULL;
2386         }
2387     }
2388
2389   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2390     {
2391       is_store = true;
2392       op = gimple_assign_rhs1 (stmt);
2393     }
2394   else
2395     is_store = false;
2396
2397   gcc_assert (op);
2398
2399   if (CONSTANT_CLASS_P (op))
2400     constant_p = true;
2401   else
2402     constant_p = false;
2403
2404   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2405   gcc_assert (vector_type);
2406   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2407
2408   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2409      created vectors. It is greater than 1 if unrolling is performed.
2410
2411      For example, we have two scalar operands, s1 and s2 (e.g., group of
2412      strided accesses of size two), while NUNITS is four (i.e., four scalars
2413      of this type can be packed in a vector).  The output vector will contain
2414      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2415      will be 2).
2416
2417      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2418      containing the operands.
2419
2420      For example, NUNITS is four as before, and the group size is 8
2421      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2422      {s5, s6, s7, s8}.  */
2423
2424   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2425
2426   number_of_places_left_in_vector = nunits;
2427   elts = XALLOCAVEC (tree, nunits);
2428   for (j = 0; j < number_of_copies; j++)
2429     {
2430       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2431         {
2432           if (is_store)
2433             op = gimple_assign_rhs1 (stmt);
2434           else
2435             {
2436               switch (code)
2437                 {
2438                   case COND_EXPR:
2439                     if (op_num == 0 || op_num == 1)
2440                       {
2441                         tree cond = gimple_assign_rhs1 (stmt);
2442                         op = TREE_OPERAND (cond, op_num);
2443                       }
2444                     else
2445                       {
2446                         if (op_num == 2)
2447                           op = gimple_assign_rhs2 (stmt);
2448                         else
2449                           op = gimple_assign_rhs3 (stmt);
2450                       }
2451                     break;
2452
2453                   case CALL_EXPR:
2454                     op = gimple_call_arg (stmt, op_num);
2455                     break;
2456
2457                   case LSHIFT_EXPR:
2458                   case RSHIFT_EXPR:
2459                   case LROTATE_EXPR:
2460                   case RROTATE_EXPR:
2461                     op = gimple_op (stmt, op_num + 1);
2462                     /* Unlike the other binary operators, shifts/rotates have
2463                        the shift count being int, instead of the same type as
2464                        the lhs, so make sure the scalar is the right type if
2465                        we are dealing with vectors of
2466                        long long/long/short/char.  */
2467                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2468                       op = fold_convert (TREE_TYPE (vector_type), op);
2469                     break;
2470
2471                   default:
2472                     op = gimple_op (stmt, op_num + 1);
2473                     break;
2474                 }
2475             }
2476
2477           if (reduc_index != -1)
2478             {
2479               loop = (gimple_bb (stmt))->loop_father;
2480               def_stmt = SSA_NAME_DEF_STMT (op);
2481
2482               gcc_assert (loop);
2483
2484               /* Get the def before the loop.  In reduction chain we have only
2485                  one initial value.  */
2486               if ((j != (number_of_copies - 1)
2487                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2488                        && i != 0))
2489                   && neutral_op)
2490                 op = neutral_op;
2491               else
2492                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2493                                             loop_preheader_edge (loop));
2494             }
2495
2496           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2497           number_of_places_left_in_vector--;
2498           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2499             {
2500               if (CONSTANT_CLASS_P (op))
2501                 {
2502                   op = fold_unary (VIEW_CONVERT_EXPR,
2503                                    TREE_TYPE (vector_type), op);
2504                   gcc_assert (op && CONSTANT_CLASS_P (op));
2505                 }
2506               else
2507                 {
2508                   tree new_temp
2509                     = make_ssa_name (TREE_TYPE (vector_type), NULL);
2510                   gimple init_stmt;
2511                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2512                                op);               
2513                   init_stmt
2514                     = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2515                                                     new_temp, op, NULL_TREE);
2516                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2517                   op = new_temp;
2518                 }
2519             }
2520           elts[number_of_places_left_in_vector] = op;
2521           if (!CONSTANT_CLASS_P (op))
2522             constant_p = false;
2523
2524           if (number_of_places_left_in_vector == 0)
2525             {
2526               number_of_places_left_in_vector = nunits;
2527
2528               if (constant_p)
2529                 vec_cst = build_vector (vector_type, elts);
2530               else
2531                 {
2532                   vec<constructor_elt, va_gc> *v;
2533                   unsigned k;
2534                   vec_alloc (v, nunits);
2535                   for (k = 0; k < nunits; ++k)
2536                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2537                   vec_cst = build_constructor (vector_type, v);
2538                 }
2539               voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2540                                                     vector_type, NULL));
2541               if (ctor_seq != NULL)
2542                 {
2543                   gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2544                   gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2545                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2546                                                         GSI_SAME_STMT);
2547                   ctor_seq = NULL;
2548                 }
2549             }
2550         }
2551     }
2552
2553   /* Since the vectors are created in the reverse order, we should invert
2554      them.  */
2555   vec_num = voprnds.length ();
2556   for (j = vec_num; j != 0; j--)
2557     {
2558       vop = voprnds[j - 1];
2559       vec_oprnds->quick_push (vop);
2560     }
2561
2562   voprnds.release ();
2563
2564   /* In case that VF is greater than the unrolling factor needed for the SLP
2565      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2566      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2567      to replicate the vectors.  */
2568   while (number_of_vectors > vec_oprnds->length ())
2569     {
2570       tree neutral_vec = NULL;
2571
2572       if (neutral_op)
2573         {
2574           if (!neutral_vec)
2575             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2576
2577           vec_oprnds->quick_push (neutral_vec);
2578         }
2579       else
2580         {
2581           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2582             vec_oprnds->quick_push (vop);
2583         }
2584     }
2585 }
2586
2587
2588 /* Get vectorized definitions from SLP_NODE that contains corresponding
2589    vectorized def-stmts.  */
2590
2591 static void
2592 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2593 {
2594   tree vec_oprnd;
2595   gimple vec_def_stmt;
2596   unsigned int i;
2597
2598   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2599
2600   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2601     {
2602       gcc_assert (vec_def_stmt);
2603       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2604       vec_oprnds->quick_push (vec_oprnd);
2605     }
2606 }
2607
2608
2609 /* Get vectorized definitions for SLP_NODE.
2610    If the scalar definitions are loop invariants or constants, collect them and
2611    call vect_get_constant_vectors() to create vector stmts.
2612    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2613    must be stored in the corresponding child of SLP_NODE, and we call
2614    vect_get_slp_vect_defs () to retrieve them.  */
2615
2616 void
2617 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2618                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2619 {
2620   gimple first_stmt;
2621   int number_of_vects = 0, i;
2622   unsigned int child_index = 0;
2623   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2624   slp_tree child = NULL;
2625   vec<tree> vec_defs;
2626   tree oprnd;
2627   bool vectorized_defs;
2628
2629   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2630   FOR_EACH_VEC_ELT (ops, i, oprnd)
2631     {
2632       /* For each operand we check if it has vectorized definitions in a child
2633          node or we need to create them (for invariants and constants).  We
2634          check if the LHS of the first stmt of the next child matches OPRND.
2635          If it does, we found the correct child.  Otherwise, we call
2636          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2637          to check this child node for the next operand.  */
2638       vectorized_defs = false;
2639       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2640         {
2641           child = SLP_TREE_CHILDREN (slp_node)[child_index];
2642
2643           /* We have to check both pattern and original def, if available.  */
2644           gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2645           gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2646
2647           if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2648               || (related
2649                   && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2650             {
2651               /* The number of vector defs is determined by the number of
2652                  vector statements in the node from which we get those
2653                  statements.  */
2654               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2655               vectorized_defs = true;
2656               child_index++;
2657             }
2658         }
2659
2660       if (!vectorized_defs)
2661         {
2662           if (i == 0)
2663             {
2664               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2665               /* Number of vector stmts was calculated according to LHS in
2666                  vect_schedule_slp_instance (), fix it by replacing LHS with
2667                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2668                  details.  */
2669               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2670                                              &rhs_size_unit);
2671               if (rhs_size_unit != lhs_size_unit)
2672                 {
2673                   number_of_vects *= rhs_size_unit;
2674                   number_of_vects /= lhs_size_unit;
2675                 }
2676             }
2677         }
2678
2679       /* Allocate memory for vectorized defs.  */
2680       vec_defs = vNULL;
2681       vec_defs.create (number_of_vects);
2682
2683       /* For reduction defs we call vect_get_constant_vectors (), since we are
2684          looking for initial loop invariant values.  */
2685       if (vectorized_defs && reduc_index == -1)
2686         /* The defs are already vectorized.  */
2687         vect_get_slp_vect_defs (child, &vec_defs);
2688       else
2689         /* Build vectors from scalar defs.  */
2690         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2691                                    number_of_vects, reduc_index);
2692
2693       vec_oprnds->quick_push (vec_defs);
2694
2695       /* For reductions, we only need initial values.  */
2696       if (reduc_index != -1)
2697         return;
2698     }
2699 }
2700
2701
2702 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2703    building a vector of type MASK_TYPE from it) and two input vectors placed in
2704    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2705    shifting by STRIDE elements of DR_CHAIN for every copy.
2706    (STRIDE is the number of vectorized stmts for NODE divided by the number of
2707    copies).
2708    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2709    the created stmts must be inserted.  */
2710
2711 static inline void
2712 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2713                            tree mask, int first_vec_indx, int second_vec_indx,
2714                            gimple_stmt_iterator *gsi, slp_tree node,
2715                            tree vectype, vec<tree> dr_chain,
2716                            int ncopies, int vect_stmts_counter)
2717 {
2718   tree perm_dest;
2719   gimple perm_stmt = NULL;
2720   stmt_vec_info next_stmt_info;
2721   int i, stride;
2722   tree first_vec, second_vec, data_ref;
2723
2724   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2725
2726   /* Initialize the vect stmts of NODE to properly insert the generated
2727      stmts later.  */
2728   for (i = SLP_TREE_VEC_STMTS (node).length ();
2729        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2730     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2731
2732   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2733   for (i = 0; i < ncopies; i++)
2734     {
2735       first_vec = dr_chain[first_vec_indx];
2736       second_vec = dr_chain[second_vec_indx];
2737
2738       /* Generate the permute statement.  */
2739       perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2740                                                 first_vec, second_vec, mask);
2741       data_ref = make_ssa_name (perm_dest, perm_stmt);
2742       gimple_set_lhs (perm_stmt, data_ref);
2743       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2744
2745       /* Store the vector statement in NODE.  */
2746       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2747
2748       first_vec_indx += stride;
2749       second_vec_indx += stride;
2750     }
2751
2752   /* Mark the scalar stmt as vectorized.  */
2753   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2754   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2755 }
2756
2757
2758 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2759    return in CURRENT_MASK_ELEMENT its equivalent in target specific
2760    representation.  Check that the mask is valid and return FALSE if not.
2761    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2762    the next vector, i.e., the current first vector is not needed.  */
2763
2764 static bool
2765 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2766                        int mask_nunits, bool only_one_vec, int index,
2767                        unsigned char *mask, int *current_mask_element,
2768                        bool *need_next_vector, int *number_of_mask_fixes,
2769                        bool *mask_fixed, bool *needs_first_vector)
2770 {
2771   int i;
2772
2773   /* Convert to target specific representation.  */
2774   *current_mask_element = first_mask_element + m;
2775   /* Adjust the value in case it's a mask for second and third vectors.  */
2776   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2777
2778   if (*current_mask_element < mask_nunits)
2779     *needs_first_vector = true;
2780
2781   /* We have only one input vector to permute but the mask accesses values in
2782      the next vector as well.  */
2783   if (only_one_vec && *current_mask_element >= mask_nunits)
2784     {
2785       if (dump_enabled_p ())
2786         {
2787           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2788                            "permutation requires at least two vectors ");
2789           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2790           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2791         }
2792
2793       return false;
2794     }
2795
2796   /* The mask requires the next vector.  */
2797   if (*current_mask_element >= mask_nunits * 2)
2798     {
2799       if (*needs_first_vector || *mask_fixed)
2800         {
2801           /* We either need the first vector too or have already moved to the
2802              next vector. In both cases, this permutation needs three
2803              vectors.  */
2804           if (dump_enabled_p ())
2805             {
2806               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2807                                "permutation requires at "
2808                                "least three vectors ");
2809               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2810               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2811             }
2812
2813           return false;
2814         }
2815
2816       /* We move to the next vector, dropping the first one and working with
2817          the second and the third - we need to adjust the values of the mask
2818          accordingly.  */
2819       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2820
2821       for (i = 0; i < index; i++)
2822         mask[i] -= mask_nunits * *number_of_mask_fixes;
2823
2824       (*number_of_mask_fixes)++;
2825       *mask_fixed = true;
2826     }
2827
2828   *need_next_vector = *mask_fixed;
2829
2830   /* This was the last element of this mask. Start a new one.  */
2831   if (index == mask_nunits - 1)
2832     {
2833       *number_of_mask_fixes = 1;
2834       *mask_fixed = false;
2835       *needs_first_vector = false;
2836     }
2837
2838   return true;
2839 }
2840
2841
2842 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2843    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2844    permute statements for the SLP node NODE of the SLP instance
2845    SLP_NODE_INSTANCE.  */
2846
2847 bool
2848 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
2849                               gimple_stmt_iterator *gsi, int vf,
2850                               slp_instance slp_node_instance, bool analyze_only)
2851 {
2852   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2853   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2854   tree mask_element_type = NULL_TREE, mask_type;
2855   int i, j, k, nunits, vec_index = 0, scalar_index;
2856   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2857   gimple next_scalar_stmt;
2858   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2859   int first_mask_element;
2860   int index, unroll_factor, current_mask_element, ncopies;
2861   unsigned char *mask;
2862   bool only_one_vec = false, need_next_vector = false;
2863   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2864   int number_of_mask_fixes = 1;
2865   bool mask_fixed = false;
2866   bool needs_first_vector = false;
2867   enum machine_mode mode;
2868
2869   mode = TYPE_MODE (vectype);
2870
2871   if (!can_vec_perm_p (mode, false, NULL))
2872     {
2873       if (dump_enabled_p ())
2874         {
2875           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2876                            "no vect permute for ");
2877           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2878           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2879         }
2880       return false;
2881     }
2882
2883   /* The generic VEC_PERM_EXPR code always uses an integral type of the
2884      same size as the vector element being permuted.  */
2885   mask_element_type = lang_hooks.types.type_for_mode
2886                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2887   mask_type = get_vectype_for_scalar_type (mask_element_type);
2888   nunits = TYPE_VECTOR_SUBPARTS (vectype);
2889   mask = XALLOCAVEC (unsigned char, nunits);
2890   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2891
2892   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2893      unrolling factor.  */
2894   orig_vec_stmts_num = group_size *
2895                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2896   if (orig_vec_stmts_num == 1)
2897     only_one_vec = true;
2898
2899   /* Number of copies is determined by the final vectorization factor
2900      relatively to SLP_NODE_INSTANCE unrolling factor.  */
2901   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2902
2903   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2904     return false;
2905
2906   /* Generate permutation masks for every NODE. Number of masks for each NODE
2907      is equal to GROUP_SIZE.
2908      E.g., we have a group of three nodes with three loads from the same
2909      location in each node, and the vector size is 4. I.e., we have a
2910      a0b0c0a1b1c1... sequence and we need to create the following vectors:
2911      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2912      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2913      ...
2914
2915      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2916      The last mask is illegal since we assume two operands for permute
2917      operation, and the mask element values can't be outside that range.
2918      Hence, the last mask must be converted into {2,5,5,5}.
2919      For the first two permutations we need the first and the second input
2920      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2921      we need the second and the third vectors: {b1,c1,a2,b2} and
2922      {c2,a3,b3,c3}.  */
2923
2924     {
2925       scalar_index = 0;
2926       index = 0;
2927       vect_stmts_counter = 0;
2928       vec_index = 0;
2929       first_vec_index = vec_index++;
2930       if (only_one_vec)
2931         second_vec_index = first_vec_index;
2932       else
2933         second_vec_index =  vec_index++;
2934
2935       for (j = 0; j < unroll_factor; j++)
2936         {
2937           for (k = 0; k < group_size; k++)
2938             {
2939               i = SLP_TREE_LOAD_PERMUTATION (node)[k];
2940               first_mask_element = i + j * group_size;
2941               if (!vect_get_mask_element (stmt, first_mask_element, 0,
2942                                           nunits, only_one_vec, index,
2943                                           mask, &current_mask_element,
2944                                           &need_next_vector,
2945                                           &number_of_mask_fixes, &mask_fixed,
2946                                           &needs_first_vector))
2947                 return false;
2948               mask[index++] = current_mask_element;
2949
2950               if (index == nunits)
2951                 {
2952                   index = 0;
2953                   if (!can_vec_perm_p (mode, false, mask))
2954                     {
2955                       if (dump_enabled_p ())
2956                         {
2957                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2958                                            vect_location, 
2959                                            "unsupported vect permute { ");
2960                           for (i = 0; i < nunits; ++i)
2961                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2962                                          mask[i]);
2963                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2964                         }
2965                       return false;
2966                     }
2967
2968                   if (!analyze_only)
2969                     {
2970                       int l;
2971                       tree mask_vec, *mask_elts;
2972                       mask_elts = XALLOCAVEC (tree, nunits);
2973                       for (l = 0; l < nunits; ++l)
2974                         mask_elts[l] = build_int_cst (mask_element_type,
2975                                                       mask[l]);
2976                       mask_vec = build_vector (mask_type, mask_elts);
2977
2978                       if (need_next_vector)
2979                         {
2980                           first_vec_index = second_vec_index;
2981                           second_vec_index = vec_index;
2982                         }
2983
2984                       next_scalar_stmt
2985                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2986
2987                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
2988                                mask_vec, first_vec_index, second_vec_index,
2989                                gsi, node, vectype, dr_chain,
2990                                ncopies, vect_stmts_counter++);
2991                     }
2992                 }
2993             }
2994         }
2995     }
2996
2997   return true;
2998 }
2999
3000
3001
3002 /* Vectorize SLP instance tree in postorder.  */
3003
3004 static bool
3005 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3006                             unsigned int vectorization_factor)
3007 {
3008   gimple stmt;
3009   bool grouped_store, is_store;
3010   gimple_stmt_iterator si;
3011   stmt_vec_info stmt_info;
3012   unsigned int vec_stmts_size, nunits, group_size;
3013   tree vectype;
3014   int i;
3015   slp_tree child;
3016
3017   if (!node)
3018     return false;
3019
3020   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3021     vect_schedule_slp_instance (child, instance, vectorization_factor);
3022
3023   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3024   stmt_info = vinfo_for_stmt (stmt);
3025
3026   /* VECTYPE is the type of the destination.  */
3027   vectype = STMT_VINFO_VECTYPE (stmt_info);
3028   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3029   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3030
3031   /* For each SLP instance calculate number of vector stmts to be created
3032      for the scalar stmts in each node of the SLP tree.  Number of vector
3033      elements in one vector iteration is the number of scalar elements in
3034      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3035      size.  */
3036   vec_stmts_size = (vectorization_factor * group_size) / nunits;
3037
3038   if (!SLP_TREE_VEC_STMTS (node).exists ())
3039     {
3040       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3041       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3042     }
3043
3044   if (dump_enabled_p ())
3045     {
3046       dump_printf_loc (MSG_NOTE,vect_location,
3047                        "------>vectorizing SLP node starting from: ");
3048       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3049       dump_printf (MSG_NOTE, "\n");
3050     }
3051
3052   /* Loads should be inserted before the first load.  */
3053   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3054       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3055       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3056       && SLP_TREE_LOAD_PERMUTATION (node).exists ())
3057     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3058   else if (is_pattern_stmt_p (stmt_info))
3059     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3060   else
3061     si = gsi_for_stmt (stmt);
3062
3063   /* Stores should be inserted just before the last store.  */
3064   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3065       && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3066     { 
3067       gimple last_store = vect_find_last_store_in_slp_instance (instance);
3068       if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3069        last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3070       si = gsi_for_stmt (last_store);
3071     }
3072
3073   /* Mark the first element of the reduction chain as reduction to properly
3074      transform the node.  In the analysis phase only the last element of the
3075      chain is marked as reduction.  */
3076   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3077       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3078     {
3079       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3080       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3081     }
3082
3083   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3084   return is_store;
3085 }
3086
3087 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3088    For loop vectorization this is done in vectorizable_call, but for SLP
3089    it needs to be deferred until end of vect_schedule_slp, because multiple
3090    SLP instances may refer to the same scalar stmt.  */
3091
3092 static void
3093 vect_remove_slp_scalar_calls (slp_tree node)
3094 {
3095   gimple stmt, new_stmt;
3096   gimple_stmt_iterator gsi;
3097   int i;
3098   slp_tree child;
3099   tree lhs;
3100   stmt_vec_info stmt_info;
3101
3102   if (!node)
3103     return;
3104
3105   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3106     vect_remove_slp_scalar_calls (child);
3107
3108   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3109     {
3110       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3111         continue;
3112       stmt_info = vinfo_for_stmt (stmt);
3113       if (stmt_info == NULL
3114           || is_pattern_stmt_p (stmt_info)
3115           || !PURE_SLP_STMT (stmt_info))
3116         continue;
3117       lhs = gimple_call_lhs (stmt);
3118       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3119       set_vinfo_for_stmt (new_stmt, stmt_info);
3120       set_vinfo_for_stmt (stmt, NULL);
3121       STMT_VINFO_STMT (stmt_info) = new_stmt;
3122       gsi = gsi_for_stmt (stmt);
3123       gsi_replace (&gsi, new_stmt, false);
3124       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3125     }
3126 }
3127
3128 /* Generate vector code for all SLP instances in the loop/basic block.  */
3129
3130 bool
3131 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3132 {
3133   vec<slp_instance> slp_instances;
3134   slp_instance instance;
3135   unsigned int i, vf;
3136   bool is_store = false;
3137
3138   if (loop_vinfo)
3139     {
3140       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3141       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3142     }
3143   else
3144     {
3145       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3146       vf = 1;
3147     }
3148
3149   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3150     {
3151       /* Schedule the tree of INSTANCE.  */
3152       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3153                                              instance, vf);
3154       if (dump_enabled_p ())
3155         dump_printf_loc (MSG_NOTE, vect_location,
3156                          "vectorizing stmts using SLP.\n");
3157     }
3158
3159   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3160     {
3161       slp_tree root = SLP_INSTANCE_TREE (instance);
3162       gimple store;
3163       unsigned int j;
3164       gimple_stmt_iterator gsi;
3165
3166       /* Remove scalar call stmts.  Do not do this for basic-block
3167          vectorization as not all uses may be vectorized.
3168          ???  Why should this be necessary?  DCE should be able to
3169          remove the stmts itself.
3170          ???  For BB vectorization we can as well remove scalar
3171          stmts starting from the SLP tree root if they have no
3172          uses.  */
3173       if (loop_vinfo)
3174         vect_remove_slp_scalar_calls (root);
3175
3176       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3177                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3178         {
3179           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3180             break;
3181
3182          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3183            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3184           /* Free the attached stmt_vec_info and remove the stmt.  */
3185           gsi = gsi_for_stmt (store);
3186           unlink_stmt_vdef (store);
3187           gsi_remove (&gsi, true);
3188           release_defs (store);
3189           free_stmt_vec_info (store);
3190         }
3191     }
3192
3193   return is_store;
3194 }
3195
3196
3197 /* Vectorize the basic block.  */
3198
3199 void
3200 vect_slp_transform_bb (basic_block bb)
3201 {
3202   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3203   gimple_stmt_iterator si;
3204
3205   gcc_assert (bb_vinfo);
3206
3207   if (dump_enabled_p ())
3208     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3209
3210   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3211     {
3212       gimple stmt = gsi_stmt (si);
3213       stmt_vec_info stmt_info;
3214
3215       if (dump_enabled_p ())
3216         {
3217           dump_printf_loc (MSG_NOTE, vect_location,
3218                            "------>SLPing statement: ");
3219           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3220           dump_printf (MSG_NOTE, "\n");
3221         }
3222
3223       stmt_info = vinfo_for_stmt (stmt);
3224       gcc_assert (stmt_info);
3225
3226       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3227       if (STMT_SLP_TYPE (stmt_info))
3228         {
3229           vect_schedule_slp (NULL, bb_vinfo);
3230           break;
3231         }
3232     }
3233
3234   if (dump_enabled_p ())
3235     dump_printf_loc (MSG_NOTE, vect_location,
3236                      "BASIC BLOCK VECTORIZED\n");
3237
3238   destroy_bb_vec_info (bb_vinfo);
3239 }