tree-vect-slp.c (vect_get_and_check_slp_defs): Remove broken code swapping operands.
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2013 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h"              /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40
41 /* Extract the location of the basic block in the source code.
42    Return the basic block location if succeed and NULL if not.  */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47   gimple stmt = NULL;
48   gimple_stmt_iterator si;
49
50   if (!bb)
51     return UNKNOWN_LOC;
52
53   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54     {
55       stmt = gsi_stmt (si);
56       if (gimple_location (stmt) != UNKNOWN_LOC)
57         return gimple_location (stmt);
58     }
59
60   return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69   int i;
70   slp_tree child;
71
72   if (!node)
73     return;
74
75   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76     vect_free_slp_tree (child);
77
78   SLP_TREE_CHILDREN (node).release ();
79   SLP_TREE_SCALAR_STMTS (node).release ();
80   SLP_TREE_VEC_STMTS (node).release ();
81
82   free (node);
83 }
84
85
86 /* Free the memory allocated for the SLP instance.  */
87
88 void
89 vect_free_slp_instance (slp_instance instance)
90 {
91   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
92   SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
93   SLP_INSTANCE_LOADS (instance).release ();
94   SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95   free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS.  */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
103 {
104   slp_tree node;
105   gimple stmt = scalar_stmts[0];
106   unsigned int nops;
107
108   if (is_gimple_call (stmt))
109     nops = gimple_call_num_args (stmt);
110   else if (is_gimple_assign (stmt))
111     {
112       nops = gimple_num_ops (stmt) - 1;
113       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114         nops++;
115     }
116   else
117     return NULL;
118
119   node = XNEW (struct _slp_tree);
120   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121   SLP_TREE_VEC_STMTS (node).create (0);
122   SLP_TREE_CHILDREN (node).create (nops);
123
124   return node;
125 }
126
127
128 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129    operand.  */
130 static vec<slp_oprnd_info> 
131 vect_create_oprnd_info (int nops, int group_size)
132 {
133   int i;
134   slp_oprnd_info oprnd_info;
135   vec<slp_oprnd_info> oprnds_info;
136
137   oprnds_info.create (nops);
138   for (i = 0; i < nops; i++)
139     {
140       oprnd_info = XNEW (struct _slp_oprnd_info);
141       oprnd_info->def_stmts.create (group_size);
142       oprnd_info->first_dt = vect_uninitialized_def;
143       oprnd_info->first_def_type = NULL_TREE;
144       oprnd_info->first_const_oprnd = NULL_TREE;
145       oprnd_info->first_pattern = false;
146       oprnds_info.quick_push (oprnd_info);
147     }
148
149   return oprnds_info;
150 }
151
152
153 /* Free operands info.  */
154
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
157 {
158   int i;
159   slp_oprnd_info oprnd_info;
160
161   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
162     {
163       oprnd_info->def_stmts.release ();
164       XDELETE (oprnd_info);
165     }
166
167   oprnds_info.release ();
168 }
169
170
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
173
174 static int
175 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
176 {
177   gimple next_stmt = first_stmt;
178   int result = 0;
179
180   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181     return -1;
182
183   do
184     {
185       if (next_stmt == stmt)
186         return result;
187       result++;
188       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
189     }
190   while (next_stmt);
191
192   return -1;
193 }
194
195
196 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
197    they are of a valid type and that they match the defs of the first stmt of
198    the SLP group (stored in OPRNDS_INFO).  */
199
200 static bool
201 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
202                              gimple stmt, bool first,
203                              vec<slp_oprnd_info> *oprnds_info)
204 {
205   tree oprnd;
206   unsigned int i, number_of_oprnds;
207   tree def;
208   gimple def_stmt;
209   enum vect_def_type dt = vect_uninitialized_def;
210   struct loop *loop = NULL;
211   bool pattern = false;
212   slp_oprnd_info oprnd_info;
213   int op_idx = 1;
214   tree compare_rhs = NULL_TREE;
215
216   if (loop_vinfo)
217     loop = LOOP_VINFO_LOOP (loop_vinfo);
218
219   if (is_gimple_call (stmt))
220     {
221       number_of_oprnds = gimple_call_num_args (stmt);
222       op_idx = 3;
223     }
224   else if (is_gimple_assign (stmt))
225     {
226       number_of_oprnds = gimple_num_ops (stmt) - 1;
227       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
228         number_of_oprnds++;
229     }
230   else
231     return false;
232
233   for (i = 0; i < number_of_oprnds; i++)
234     {
235       if (compare_rhs)
236         {
237           oprnd = compare_rhs;
238           compare_rhs = NULL_TREE;
239         }
240       else
241         oprnd = gimple_op (stmt, op_idx++);
242
243       oprnd_info = (*oprnds_info)[i];
244
245       if (COMPARISON_CLASS_P (oprnd))
246         {
247           compare_rhs = TREE_OPERAND (oprnd, 1);
248           oprnd = TREE_OPERAND (oprnd, 0);
249         }
250
251       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
252                                &def, &dt)
253           || (!def_stmt && dt != vect_constant_def))
254         {
255           if (dump_enabled_p ())
256             {
257               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
258                                "Build SLP failed: can't find def for ");
259               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
260             }
261
262           return false;
263         }
264
265       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
266          from the pattern.  Check that all the stmts of the node are in the
267          pattern.  */
268       if (def_stmt && gimple_bb (def_stmt)
269           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
270               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
271                   && gimple_code (def_stmt) != GIMPLE_PHI))
272           && vinfo_for_stmt (def_stmt)
273           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
274           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
275           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
276         {
277           pattern = true;
278           if (!first && !oprnd_info->first_pattern)
279             {
280               if (dump_enabled_p ())
281                 {
282                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
283                                    "Build SLP failed: some of the stmts"
284                                    " are in a pattern, and others are not ");
285                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
286                 }
287
288               return false;
289             }
290
291           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
292           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
293
294           if (dt == vect_unknown_def_type)
295             {
296               if (dump_enabled_p ())
297                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
298                                  "Unsupported pattern.");
299               return false;
300             }
301
302           switch (gimple_code (def_stmt))
303             {
304               case GIMPLE_PHI:
305                 def = gimple_phi_result (def_stmt);
306                 break;
307
308               case GIMPLE_ASSIGN:
309                 def = gimple_assign_lhs (def_stmt);
310                 break;
311
312               default:
313                 if (dump_enabled_p ())
314                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
315                                    "unsupported defining stmt: ");
316                 return false;
317             }
318         }
319
320       if (first)
321         {
322           oprnd_info->first_dt = dt;
323           oprnd_info->first_pattern = pattern;
324           if (def)
325             {
326               oprnd_info->first_def_type = TREE_TYPE (def);
327               oprnd_info->first_const_oprnd = NULL_TREE;
328             }
329           else
330             {
331               oprnd_info->first_def_type = NULL_TREE;
332               oprnd_info->first_const_oprnd = oprnd;
333             }
334         }
335       else
336         {
337           /* Not first stmt of the group, check that the def-stmt/s match
338              the def-stmt/s of the first stmt.  Allow different definition
339              types for reduction chains: the first stmt must be a
340              vect_reduction_def (a phi node), and the rest
341              vect_internal_def.  */
342           if (((oprnd_info->first_dt != dt
343                 && !(oprnd_info->first_dt == vect_reduction_def
344                      && dt == vect_internal_def))
345                || (oprnd_info->first_def_type != NULL_TREE
346                    && def
347                    && !types_compatible_p (oprnd_info->first_def_type,
348                                            TREE_TYPE (def))))
349                || (!def
350                    && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
351                                            TREE_TYPE (oprnd))))
352             {
353               if (dump_enabled_p ())
354                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
355                                  "Build SLP failed: different types ");
356
357               return false;
358             }
359         }
360
361       /* Check the types of the definitions.  */
362       switch (dt)
363         {
364         case vect_constant_def:
365         case vect_external_def:
366         case vect_reduction_def:
367           break;
368
369         case vect_internal_def:
370           oprnd_info->def_stmts.quick_push (def_stmt);
371           break;
372
373         default:
374           /* FORNOW: Not supported.  */
375           if (dump_enabled_p ())
376             {
377               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
378                                "Build SLP failed: illegal type of def ");
379               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
380             }
381
382           return false;
383         }
384     }
385
386   return true;
387 }
388
389
390 /* Recursively build an SLP tree starting from NODE.
391    Fail (and return FALSE) if def-stmts are not isomorphic, require data
392    permutation or are of unsupported types of operation.  Otherwise, return
393    TRUE.  */
394
395 static bool
396 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
397                      slp_tree *node, unsigned int group_size,
398                      unsigned int *max_nunits,
399                      vec<slp_tree> *loads,
400                      unsigned int vectorization_factor)
401 {
402   unsigned int i;
403   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
404   gimple stmt = stmts[0];
405   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
406   enum tree_code first_cond_code = ERROR_MARK;
407   tree lhs;
408   bool stop_recursion = false, need_same_oprnds = false;
409   tree vectype, scalar_type, first_op1 = NULL_TREE;
410   optab optab;
411   int icode;
412   enum machine_mode optab_op2_mode;
413   enum machine_mode vec_mode;
414   struct data_reference *first_dr;
415   HOST_WIDE_INT dummy;
416   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
417   vec<slp_oprnd_info> oprnds_info;
418   unsigned int nops;
419   slp_oprnd_info oprnd_info;
420   tree cond;
421
422   if (is_gimple_call (stmt))
423     nops = gimple_call_num_args (stmt);
424   else if (is_gimple_assign (stmt))
425     {
426       nops = gimple_num_ops (stmt) - 1;
427       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
428         nops++;
429     }
430   else
431     return false;
432
433   oprnds_info = vect_create_oprnd_info (nops, group_size);
434
435   /* For every stmt in NODE find its def stmt/s.  */
436   FOR_EACH_VEC_ELT (stmts, i, stmt)
437     {
438       if (dump_enabled_p ())
439         {
440           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
441           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
442         }
443
444       /* Fail to vectorize statements marked as unvectorizable.  */
445       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
446         {
447           if (dump_enabled_p ())
448             {
449               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
450                                "Build SLP failed: unvectorizable statement ");
451               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
452             }
453
454           vect_free_oprnd_info (oprnds_info);
455           return false;
456         }
457
458       lhs = gimple_get_lhs (stmt);
459       if (lhs == NULL_TREE)
460         {
461           if (dump_enabled_p ())
462             {
463               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
464                                "Build SLP failed: not GIMPLE_ASSIGN nor "
465                                "GIMPLE_CALL ");
466               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
467             }
468
469           vect_free_oprnd_info (oprnds_info);
470           return false;
471         }
472
473        if (is_gimple_assign (stmt)
474            && gimple_assign_rhs_code (stmt) == COND_EXPR
475            && (cond = gimple_assign_rhs1 (stmt))
476            && !COMPARISON_CLASS_P (cond))
477         {
478           if (dump_enabled_p ())
479             {
480               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
481                                "Build SLP failed: condition is not "
482                                "comparison ");
483               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
484             }
485
486           vect_free_oprnd_info (oprnds_info);
487           return false;
488         }
489
490       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
491       vectype = get_vectype_for_scalar_type (scalar_type);
492       if (!vectype)
493         {
494           if (dump_enabled_p ())
495             {
496               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
497                                "Build SLP failed: unsupported data-type ");
498               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
499                                  scalar_type);
500             }
501
502           vect_free_oprnd_info (oprnds_info);
503           return false;
504         }
505
506       /* In case of multiple types we need to detect the smallest type.  */
507       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
508         {
509           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
510           if (bb_vinfo)
511             vectorization_factor = *max_nunits;
512         }
513
514       if (is_gimple_call (stmt))
515         {
516           rhs_code = CALL_EXPR;
517           if (gimple_call_internal_p (stmt)
518               || gimple_call_tail_p (stmt)
519               || gimple_call_noreturn_p (stmt)
520               || !gimple_call_nothrow_p (stmt)
521               || gimple_call_chain (stmt))
522             {
523               if (dump_enabled_p ())
524                 {
525                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
526                                    "Build SLP failed: unsupported call type ");
527                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
528                 }
529
530               vect_free_oprnd_info (oprnds_info);
531               return false;
532             }
533         }
534       else
535         rhs_code = gimple_assign_rhs_code (stmt);
536
537       /* Check the operation.  */
538       if (i == 0)
539         {
540           first_stmt_code = rhs_code;
541
542           /* Shift arguments should be equal in all the packed stmts for a
543              vector shift with scalar shift operand.  */
544           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
545               || rhs_code == LROTATE_EXPR
546               || rhs_code == RROTATE_EXPR)
547             {
548               vec_mode = TYPE_MODE (vectype);
549
550               /* First see if we have a vector/vector shift.  */
551               optab = optab_for_tree_code (rhs_code, vectype,
552                                            optab_vector);
553
554               if (!optab
555                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
556                 {
557                   /* No vector/vector shift, try for a vector/scalar shift.  */
558                   optab = optab_for_tree_code (rhs_code, vectype,
559                                                optab_scalar);
560
561                   if (!optab)
562                     {
563                       if (dump_enabled_p ())
564                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
565                                          "Build SLP failed: no optab.");
566                       vect_free_oprnd_info (oprnds_info);
567                       return false;
568                     }
569                   icode = (int) optab_handler (optab, vec_mode);
570                   if (icode == CODE_FOR_nothing)
571                     {
572                       if (dump_enabled_p ())
573                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
574                                          "Build SLP failed: "
575                                          "op not supported by target.");
576                       vect_free_oprnd_info (oprnds_info);
577                       return false;
578                     }
579                   optab_op2_mode = insn_data[icode].operand[2].mode;
580                   if (!VECTOR_MODE_P (optab_op2_mode))
581                     {
582                       need_same_oprnds = true;
583                       first_op1 = gimple_assign_rhs2 (stmt);
584                     }
585                 }
586             }
587           else if (rhs_code == WIDEN_LSHIFT_EXPR)
588             {
589               need_same_oprnds = true;
590               first_op1 = gimple_assign_rhs2 (stmt);
591             }
592         }
593       else
594         {
595           if (first_stmt_code != rhs_code
596               && (first_stmt_code != IMAGPART_EXPR
597                   || rhs_code != REALPART_EXPR)
598               && (first_stmt_code != REALPART_EXPR
599                   || rhs_code != IMAGPART_EXPR)
600               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
601                    && (first_stmt_code == ARRAY_REF
602                        || first_stmt_code == BIT_FIELD_REF
603                        || first_stmt_code == INDIRECT_REF
604                        || first_stmt_code == COMPONENT_REF
605                        || first_stmt_code == MEM_REF)))
606             {
607               if (dump_enabled_p ())
608                 {
609                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
610                                    "Build SLP failed: different operation "
611                                    "in stmt ");
612                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
613                 }
614
615               vect_free_oprnd_info (oprnds_info);
616               return false;
617             }
618
619           if (need_same_oprnds
620               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
621             {
622               if (dump_enabled_p ())
623                 {
624                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
625                                    "Build SLP failed: different shift "
626                                    "arguments in ");
627                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
628                 }
629
630               vect_free_oprnd_info (oprnds_info);
631               return false;
632             }
633
634           if (rhs_code == CALL_EXPR)
635             {
636               gimple first_stmt = stmts[0];
637               if (gimple_call_num_args (stmt) != nops
638                   || !operand_equal_p (gimple_call_fn (first_stmt),
639                                        gimple_call_fn (stmt), 0)
640                   || gimple_call_fntype (first_stmt)
641                      != gimple_call_fntype (stmt))
642                 {
643                   if (dump_enabled_p ())
644                     {
645                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
646                                        "Build SLP failed: different calls in ");
647                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
648                                         stmt, 0);
649                     }
650
651                   vect_free_oprnd_info (oprnds_info);
652                   return false;
653                 }
654             }
655         }
656
657       /* Grouped store or load.  */
658       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
659         {
660           if (REFERENCE_CLASS_P (lhs))
661             {
662               /* Store.  */
663               if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
664                                                 stmt, (i == 0), &oprnds_info))
665                 {
666                   vect_free_oprnd_info (oprnds_info);
667                   return false;
668                 }
669             }
670           else
671             {
672               /* Load.  */
673               unsigned unrolling_factor
674                 = least_common_multiple
675                     (*max_nunits, group_size) / group_size;
676               /* FORNOW: Check that there is no gap between the loads
677                  and no gap between the groups when we need to load
678                  multiple groups at once.
679                  ???  We should enhance this to only disallow gaps
680                  inside vectors.  */
681               if ((unrolling_factor > 1
682                    && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
683                    && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
684                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
685                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
686                 {
687                   if (dump_enabled_p ())
688                     {
689                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
690                                        "Build SLP failed: grouped "
691                                        "loads have gaps ");
692                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
693                                         stmt, 0);
694                     }
695
696                   vect_free_oprnd_info (oprnds_info);
697                   return false;
698                 }
699
700               /* Check that the size of interleaved loads group is not
701                  greater than the SLP group size.  */
702               unsigned ncopies
703                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
704               if (loop_vinfo
705                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
706                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
707                        - GROUP_GAP (vinfo_for_stmt (stmt)))
708                       > ncopies * group_size))
709                 {
710                   if (dump_enabled_p ())
711                     {
712                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713                                        "Build SLP failed: the number "
714                                        "of interleaved loads is greater than "
715                                        "the SLP group size ");
716                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
717                                         stmt, 0);
718                     }
719
720                   vect_free_oprnd_info (oprnds_info);
721                   return false;
722                 }
723
724               old_first_load = first_load;
725               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
726               if (prev_first_load)
727                 {
728                   /* Check that there are no loads from different interleaving
729                      chains in the same node.  The only exception is complex
730                      numbers.  */
731                   if (prev_first_load != first_load
732                       && rhs_code != REALPART_EXPR
733                       && rhs_code != IMAGPART_EXPR)
734                     {
735                       if (dump_enabled_p ())
736                         {
737                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
738                                            vect_location, 
739                                            "Build SLP failed: different "
740                                            "interleaving chains in one node ");
741                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
742                                             stmt, 0);
743                         }
744
745                       vect_free_oprnd_info (oprnds_info);
746                       return false;
747                     }
748                 }
749               else
750                 prev_first_load = first_load;
751
752               /* In some cases a group of loads is just the same load
753                  repeated N times.  Only analyze its cost once.  */
754               if (first_load == stmt && old_first_load != first_load)
755                 {
756                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
757                   if (vect_supportable_dr_alignment (first_dr, false)
758                       == dr_unaligned_unsupported)
759                     {
760                       if (dump_enabled_p ())
761                         {
762                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
763                                            vect_location, 
764                                            "Build SLP failed: unsupported "
765                                            "unaligned load ");
766                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
767                                             stmt, 0);
768                         }
769
770                       vect_free_oprnd_info (oprnds_info);
771                       return false;
772                     }
773                 }
774
775               /* We stop the tree when we reach a group of loads.  */
776               stop_recursion = true;
777              continue;
778            }
779         } /* Grouped access.  */
780       else
781         {
782           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
783             {
784               /* Not grouped load.  */
785               if (dump_enabled_p ())
786                 {
787                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
788                                    "Build SLP failed: not grouped load ");
789                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
790                 }
791
792               /* FORNOW: Not grouped loads are not supported.  */
793               vect_free_oprnd_info (oprnds_info);
794               return false;
795             }
796
797           /* Not memory operation.  */
798           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
799               && TREE_CODE_CLASS (rhs_code) != tcc_unary
800               && rhs_code != COND_EXPR
801               && rhs_code != CALL_EXPR)
802             {
803               if (dump_enabled_p ())
804                 {
805                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
806                                    "Build SLP failed: operation");
807                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
808                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
809                 }
810
811               vect_free_oprnd_info (oprnds_info);
812               return false;
813             }
814
815           if (rhs_code == COND_EXPR)
816             {
817               tree cond_expr = gimple_assign_rhs1 (stmt);
818
819               if (i == 0)
820                 first_cond_code = TREE_CODE (cond_expr);
821               else if (first_cond_code != TREE_CODE (cond_expr))
822                 {
823                   if (dump_enabled_p ())
824                     {
825                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
826                                        "Build SLP failed: different"
827                                        " operation");
828                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
829                                         stmt, 0);
830                     }
831
832                   vect_free_oprnd_info (oprnds_info);
833                   return false;
834                 }
835             }
836
837           /* Find the def-stmts.  */
838           if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, stmt,
839                                             (i == 0), &oprnds_info))
840             {
841               vect_free_oprnd_info (oprnds_info);
842               return false;
843             }
844         }
845     }
846
847   /* Grouped loads were reached - stop the recursion.  */
848   if (stop_recursion)
849     {
850       loads->safe_push (*node);
851       vect_free_oprnd_info (oprnds_info);
852       return true;
853     }
854
855   /* Create SLP_TREE nodes for the definition node/s.  */
856   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
857     {
858       slp_tree child;
859
860       if (oprnd_info->first_dt != vect_internal_def)
861         continue;
862
863       child = vect_create_new_slp_node (oprnd_info->def_stmts);
864       if (!child
865           || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
866                                    max_nunits, loads,
867                                    vectorization_factor))
868         {
869           if (child)
870             oprnd_info->def_stmts = vNULL;
871           vect_free_slp_tree (child);
872           vect_free_oprnd_info (oprnds_info);
873           return false;
874         }
875
876       oprnd_info->def_stmts.create (0);
877       SLP_TREE_CHILDREN (*node).quick_push (child);
878     }
879
880   vect_free_oprnd_info (oprnds_info);
881   return true;
882 }
883
884 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
885
886 static void
887 vect_print_slp_tree (int dump_kind, slp_tree node)
888 {
889   int i;
890   gimple stmt;
891   slp_tree child;
892
893   if (!node)
894     return;
895
896   dump_printf (dump_kind, "node ");
897   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
898     {
899       dump_printf (dump_kind, "\n\tstmt %d ", i);
900       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
901     }
902   dump_printf (dump_kind, "\n");
903
904   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
905     vect_print_slp_tree (dump_kind, child);
906 }
907
908
909 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
910    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
911    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
912    stmts in NODE are to be marked.  */
913
914 static void
915 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
916 {
917   int i;
918   gimple stmt;
919   slp_tree child;
920
921   if (!node)
922     return;
923
924   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
925     if (j < 0 || i == j)
926       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
927
928   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
929     vect_mark_slp_stmts (child, mark, j);
930 }
931
932
933 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
934
935 static void
936 vect_mark_slp_stmts_relevant (slp_tree node)
937 {
938   int i;
939   gimple stmt;
940   stmt_vec_info stmt_info;
941   slp_tree child;
942
943   if (!node)
944     return;
945
946   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
947     {
948       stmt_info = vinfo_for_stmt (stmt);
949       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
950                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
951       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
952     }
953
954   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
955     vect_mark_slp_stmts_relevant (child);
956 }
957
958
959 /* Check if the permutation required by the SLP INSTANCE is supported.
960    Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed.  */
961
962 static bool
963 vect_supported_slp_permutation_p (slp_instance instance)
964 {
965   slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
966   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
967   gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
968   vec<slp_tree> sorted_loads = vNULL;
969   int index;
970   slp_tree *tmp_loads = NULL;
971   int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
972   slp_tree load;
973
974   /* FORNOW: The only supported loads permutation is loads from the same
975      location in all the loads in the node, when the data-refs in
976      nodes of LOADS constitute an interleaving chain.
977      Sort the nodes according to the order of accesses in the chain.  */
978   tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
979   for (i = 0, j = 0;
980        SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
981        && SLP_INSTANCE_LOADS (instance).iterate (j, &load);
982        i += group_size, j++)
983     {
984       gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
985       /* Check that the loads are all in the same interleaving chain.  */
986       if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
987         {
988           if (dump_enabled_p ())
989             {
990               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
991                                "Build SLP failed: unsupported data "
992                                "permutation ");
993               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
994                                 scalar_stmt, 0);
995             }
996
997           free (tmp_loads);
998           return false;
999         }
1000
1001       tmp_loads[index] = load;
1002     }
1003
1004   sorted_loads.create (group_size);
1005   for (i = 0; i < group_size; i++)
1006      sorted_loads.safe_push (tmp_loads[i]);
1007
1008   SLP_INSTANCE_LOADS (instance).release ();
1009   SLP_INSTANCE_LOADS (instance) = sorted_loads;
1010   free (tmp_loads);
1011
1012   if (!vect_transform_slp_perm_load (stmt, vNULL, NULL,
1013                                      SLP_INSTANCE_UNROLLING_FACTOR (instance),
1014                                      instance, true))
1015     return false;
1016
1017   return true;
1018 }
1019
1020
1021 /* Rearrange the statements of NODE according to PERMUTATION.  */
1022
1023 static void
1024 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1025                           vec<int> permutation)
1026 {
1027   gimple stmt;
1028   vec<gimple> tmp_stmts;
1029   unsigned int i;
1030   slp_tree child;
1031
1032   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1033     vect_slp_rearrange_stmts (child, group_size, permutation);
1034
1035   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1036   tmp_stmts.create (group_size);
1037   tmp_stmts.quick_grow_cleared (group_size);
1038
1039   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1040     tmp_stmts[permutation[i]] = stmt;
1041
1042   SLP_TREE_SCALAR_STMTS (node).release ();
1043   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1044 }
1045
1046
1047 /* Check if the required load permutation is supported.
1048    LOAD_PERMUTATION contains a list of indices of the loads.
1049    In SLP this permutation is relative to the order of grouped stores that are
1050    the base of the SLP instance.  */
1051
1052 static bool
1053 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1054                                    vec<int> load_permutation)
1055 {
1056   int i = 0, j, prev = -1, next, k, number_of_groups;
1057   bool supported, bad_permutation = false;
1058   sbitmap load_index;
1059   slp_tree node, other_complex_node;
1060   gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1061   unsigned complex_numbers = 0;
1062   struct data_reference *dr;
1063   bb_vec_info bb_vinfo;
1064
1065   /* FORNOW: permutations are only supported in SLP.  */
1066   if (!slp_instn)
1067     return false;
1068
1069   if (dump_enabled_p ())
1070     {
1071       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1072       FOR_EACH_VEC_ELT (load_permutation, i, next)
1073         dump_printf (MSG_NOTE, "%d ", next);
1074     }
1075
1076   /* In case of reduction every load permutation is allowed, since the order
1077      of the reduction statements is not important (as opposed to the case of
1078      grouped stores).  The only condition we need to check is that all the
1079      load nodes are of the same size and have the same permutation (and then
1080      rearrange all the nodes of the SLP instance according to this 
1081      permutation).  */
1082
1083   /* Check that all the load nodes are of the same size.  */
1084   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1085     {
1086       if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1087         return false;
1088
1089       stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1090       if (is_gimple_assign (stmt) 
1091           && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1092               || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1093         complex_numbers++;
1094     }
1095
1096   /* Complex operands can be swapped as following:
1097       real_c = real_b + real_a;
1098       imag_c = imag_a + imag_b;
1099      i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of 
1100      {real_a, imag_a} and {real_b, imag_b}.  We check here that if interleaving
1101      chains are mixed, they match the above pattern.  */
1102   if (complex_numbers)
1103     {
1104       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1105         {
1106           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, stmt)
1107             {
1108               if (j == 0)
1109                 first = stmt;
1110               else
1111                 {
1112                   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1113                     {
1114                       if (complex_numbers != 2)
1115                         return false;
1116
1117                       if (i == 0)
1118                         k = 1;
1119                       else
1120                         k = 0;
1121  
1122                       other_complex_node = SLP_INSTANCE_LOADS (slp_instn)[k];
1123                       other_node_first = 
1124                                 SLP_TREE_SCALAR_STMTS (other_complex_node)[0];
1125
1126                       if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1127                           != other_node_first)
1128                        return false;
1129                     }
1130                 }
1131             }
1132         }
1133     }
1134
1135   /* We checked that this case ok, so there is no need to proceed with 
1136      permutation tests.  */
1137   if (complex_numbers == 2
1138       && SLP_INSTANCE_LOADS (slp_instn).length () == 2)
1139     {
1140       SLP_INSTANCE_LOADS (slp_instn).release ();
1141       SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1142       return true;
1143     }
1144                    
1145   node = SLP_INSTANCE_TREE (slp_instn);
1146   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1147   /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1148      instance, not all the loads belong to the same node or interleaving
1149      group.  Hence, we need to divide them into groups according to
1150      GROUP_SIZE.  */
1151   number_of_groups = load_permutation.length () / group_size;
1152
1153   /* Reduction (there are no data-refs in the root).
1154      In reduction chain the order of the loads is important.  */
1155   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1156       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1157     {
1158       int first_group_load_index;
1159
1160       /* Compare all the permutation sequences to the first one.  */
1161       for (i = 1; i < number_of_groups; i++)
1162         {
1163           k = 0;
1164           for (j = i * group_size; j < i * group_size + group_size; j++)
1165             {
1166               next = load_permutation[j];
1167               first_group_load_index = load_permutation[k];
1168
1169               if (next != first_group_load_index)
1170                 {
1171                   bad_permutation = true;
1172                   break;
1173                 }
1174
1175               k++;
1176             }
1177
1178           if (bad_permutation)
1179             break;
1180         }
1181
1182       if (!bad_permutation)
1183         {
1184           /* Check that the loads in the first sequence are different and there
1185              are no gaps between them.  */
1186           load_index = sbitmap_alloc (group_size);
1187           bitmap_clear (load_index);
1188           for (k = 0; k < group_size; k++)
1189             {
1190               first_group_load_index = load_permutation[k];
1191               if (bitmap_bit_p (load_index, first_group_load_index))
1192                 {
1193                   bad_permutation = true;
1194                   break;
1195                 }
1196
1197               bitmap_set_bit (load_index, first_group_load_index);
1198             }
1199
1200           if (!bad_permutation)
1201             for (k = 0; k < group_size; k++)
1202               if (!bitmap_bit_p (load_index, k))
1203                 {
1204                   bad_permutation = true;
1205                   break;
1206                 }
1207
1208           sbitmap_free (load_index);
1209         }
1210
1211       if (!bad_permutation)
1212         {
1213           /* This permutation is valid for reduction.  Since the order of the
1214              statements in the nodes is not important unless they are memory
1215              accesses, we can rearrange the statements in all the nodes 
1216              according to the order of the loads.  */
1217           vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1218                                     load_permutation);
1219           SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1220           return true;
1221         }
1222     }
1223
1224   /* In basic block vectorization we allow any subchain of an interleaving
1225      chain.
1226      FORNOW: not supported in loop SLP because of realignment compications.  */
1227   bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1228   bad_permutation = false;
1229   /* Check that for every node in the instance the loads form a subchain.  */
1230   if (bb_vinfo)
1231     {
1232       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1233         {
1234           next_load = NULL;
1235           first_load = NULL;
1236           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1237             {
1238               if (!first_load)
1239                 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1240               else if (first_load
1241                          != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1242                 {
1243                   bad_permutation = true;
1244                   break;
1245                 }
1246
1247               if (j != 0 && next_load != load)
1248                 {
1249                   bad_permutation = true;
1250                   break;
1251                 }
1252
1253               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1254             }
1255
1256           if (bad_permutation)
1257             break;
1258         }
1259
1260       /* Check that the alignment of the first load in every subchain, i.e.,
1261          the first statement in every load node, is supported.  */
1262       if (!bad_permutation)
1263         {
1264           FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1265             {
1266               first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1267               if (first_load
1268                     != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1269                 {
1270                   dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1271                   if (vect_supportable_dr_alignment (dr, false)
1272                        == dr_unaligned_unsupported)
1273                     {
1274                       if (dump_enabled_p ())
1275                         {
1276                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1277                                            vect_location, 
1278                                            "unsupported unaligned load ");
1279                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1280                                             first_load, 0);
1281                         }
1282                       bad_permutation = true;
1283                       break;
1284                     }
1285                 }
1286             }
1287
1288           if (!bad_permutation)
1289             {
1290               SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1291               return true;
1292             }
1293         }
1294     }
1295
1296   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1297      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1298      well (unless it's reduction).  */
1299   if (load_permutation.length ()
1300       != (unsigned int) (group_size * group_size))
1301     return false;
1302
1303   supported = true;
1304   load_index = sbitmap_alloc (group_size);
1305   bitmap_clear (load_index);
1306   for (j = 0; j < group_size; j++)
1307     {
1308       for (i = j * group_size, k = 0;
1309            load_permutation.iterate (i, &next) && k < group_size;
1310            i++, k++)
1311        {
1312          if (i != j * group_size && next != prev)
1313           {
1314             supported = false;
1315             break;
1316           }
1317
1318          prev = next;
1319        }
1320
1321       if (bitmap_bit_p (load_index, prev))
1322         {
1323           supported = false;
1324           break;
1325         }
1326
1327       bitmap_set_bit (load_index, prev);
1328     }
1329  
1330   for (j = 0; j < group_size; j++)
1331     if (!bitmap_bit_p (load_index, j))
1332       {
1333         sbitmap_free (load_index);
1334         return false;
1335       }
1336
1337   sbitmap_free (load_index);
1338
1339   if (supported && i == group_size * group_size
1340       && vect_supported_slp_permutation_p (slp_instn))
1341     return true;
1342
1343   return false;
1344 }
1345
1346
1347 /* Find the first load in the loop that belongs to INSTANCE.
1348    When loads are in several SLP nodes, there can be a case in which the first
1349    load does not appear in the first SLP node to be transformed, causing
1350    incorrect order of statements.  Since we generate all the loads together,
1351    they must be inserted before the first load of the SLP instance and not
1352    before the first load of the first node of the instance.  */
1353
1354 static gimple
1355 vect_find_first_load_in_slp_instance (slp_instance instance)
1356 {
1357   int i, j;
1358   slp_tree load_node;
1359   gimple first_load = NULL, load;
1360
1361   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1362     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1363       first_load = get_earlier_stmt (load, first_load);
1364
1365   return first_load;
1366 }
1367
1368
1369 /* Find the last store in SLP INSTANCE.  */
1370
1371 static gimple
1372 vect_find_last_store_in_slp_instance (slp_instance instance)
1373 {
1374   int i;
1375   slp_tree node;
1376   gimple last_store = NULL, store;
1377
1378   node = SLP_INSTANCE_TREE (instance);
1379   for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1380     last_store = get_later_stmt (store, last_store);
1381
1382   return last_store;
1383 }
1384
1385 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1386
1387 static void
1388 vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1389                          slp_instance instance, slp_tree node,
1390                          stmt_vector_for_cost *prologue_cost_vec,
1391                          unsigned ncopies_for_cost)
1392 {
1393   stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1394
1395   unsigned i;
1396   slp_tree child;
1397   gimple stmt, s;
1398   stmt_vec_info stmt_info;
1399   tree lhs;
1400   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1401
1402   /* Recurse down the SLP tree.  */
1403   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1404     vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1405                              instance, child, prologue_cost_vec,
1406                              ncopies_for_cost);
1407
1408   /* Look at the first scalar stmt to determine the cost.  */
1409   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1410   stmt_info = vinfo_for_stmt (stmt);
1411   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1412     {
1413       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1414         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1415                                vect_uninitialized_def,
1416                                node, prologue_cost_vec, body_cost_vec);
1417       else
1418         {
1419           int i;
1420           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1421           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1422                                 node, prologue_cost_vec, body_cost_vec);
1423           /* If the load is permuted record the cost for the permutation.
1424              ???  Loads from multiple chains are let through here only
1425              for a single special case involving complex numbers where
1426              in the end no permutation is necessary.  */
1427           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1428             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1429                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1430                 && vect_get_place_in_interleaving_chain
1431                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1432               {
1433                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1434                                   stmt_info, 0, vect_body);
1435                 break;
1436               }
1437         }
1438     }
1439   else
1440     record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1441                       stmt_info, 0, vect_body);
1442
1443   /* Scan operands and account for prologue cost of constants/externals.
1444      ???  This over-estimates cost for multiple uses and should be
1445      re-engineered.  */
1446   lhs = gimple_get_lhs (stmt);
1447   for (i = 0; i < gimple_num_ops (stmt); ++i)
1448     {
1449       tree def, op = gimple_op (stmt, i);
1450       gimple def_stmt;
1451       enum vect_def_type dt;
1452       if (!op || op == lhs)
1453         continue;
1454       if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1455                               &def_stmt, &def, &dt)
1456           && (dt == vect_constant_def || dt == vect_external_def))
1457         record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1458                           stmt_info, 0, vect_prologue);
1459     }
1460 }
1461
1462 /* Compute the cost for the SLP instance INSTANCE.  */
1463
1464 static void
1465 vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1466                        slp_instance instance, unsigned nunits)
1467 {
1468   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1469   unsigned ncopies_for_cost;
1470   stmt_info_for_cost *si;
1471   unsigned i;
1472
1473   /* Calculate the number of vector stmts to create based on the unrolling
1474      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1475      GROUP_SIZE / NUNITS otherwise.  */
1476   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1477   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1478
1479   prologue_cost_vec.create (10);
1480   body_cost_vec.create (10);
1481   SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1482   vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1483                            instance, SLP_INSTANCE_TREE (instance),
1484                            &prologue_cost_vec, ncopies_for_cost);
1485
1486   /* Record the prologue costs, which were delayed until we were
1487      sure that SLP was successful.  Unlike the body costs, we know
1488      the final values now regardless of the loop vectorization factor.  */
1489   void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1490                 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1491   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1492     {
1493       struct _stmt_vec_info *stmt_info
1494         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1495       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1496                             si->misalign, vect_prologue);
1497     }
1498
1499   prologue_cost_vec.release ();
1500 }
1501
1502 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1503    vect_build_slp_tree to build a tree of packed stmts if possible.
1504    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1505
1506 static bool
1507 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1508                            gimple stmt)
1509 {
1510   slp_instance new_instance;
1511   slp_tree node;
1512   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1513   unsigned int unrolling_factor = 1, nunits;
1514   tree vectype, scalar_type = NULL_TREE;
1515   gimple next;
1516   unsigned int vectorization_factor = 0;
1517   int i;
1518   unsigned int max_nunits = 0;
1519   vec<slp_tree> loads;
1520   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1521   vec<gimple> scalar_stmts;
1522
1523   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1524     {
1525       if (dr)
1526         {
1527           scalar_type = TREE_TYPE (DR_REF (dr));
1528           vectype = get_vectype_for_scalar_type (scalar_type);
1529         }
1530       else
1531         {
1532           gcc_assert (loop_vinfo);
1533           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1534         }
1535
1536       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1537     }
1538   else
1539     {
1540       gcc_assert (loop_vinfo);
1541       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1542       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1543     }
1544
1545   if (!vectype)
1546     {
1547       if (dump_enabled_p ())
1548         {
1549           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1550                            "Build SLP failed: unsupported data-type ");
1551           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1552         }
1553
1554       return false;
1555     }
1556
1557   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1558   if (loop_vinfo)
1559     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1560   else
1561     vectorization_factor = nunits;
1562
1563   /* Calculate the unrolling factor.  */
1564   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1565   if (unrolling_factor != 1 && !loop_vinfo)
1566     {
1567       if (dump_enabled_p ())
1568         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1569                          "Build SLP failed: unrolling required in basic"
1570                          " block SLP");
1571
1572       return false;
1573     }
1574
1575   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1576   scalar_stmts.create (group_size);
1577   next = stmt;
1578   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1579     {
1580       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1581       while (next)
1582         {
1583           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1584               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1585             scalar_stmts.safe_push (
1586                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1587           else
1588             scalar_stmts.safe_push (next);
1589           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1590         }
1591     }
1592   else
1593     {
1594       /* Collect reduction statements.  */
1595       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1596       for (i = 0; reductions.iterate (i, &next); i++)
1597         scalar_stmts.safe_push (next);
1598     }
1599
1600   node = vect_create_new_slp_node (scalar_stmts);
1601
1602   loads.create (group_size);
1603
1604   /* Build the tree for the SLP instance.  */
1605   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1606                            &max_nunits, &loads,
1607                            vectorization_factor))
1608     {
1609       /* Calculate the unrolling factor based on the smallest type.  */
1610       if (max_nunits > nunits)
1611         unrolling_factor = least_common_multiple (max_nunits, group_size)
1612                            / group_size;
1613
1614       if (unrolling_factor != 1 && !loop_vinfo)
1615         {
1616           if (dump_enabled_p ())
1617             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1618                              "Build SLP failed: unrolling required in basic"
1619                              " block SLP");
1620           vect_free_slp_tree (node);
1621           loads.release ();
1622           return false;
1623         }
1624
1625       /* Create a new SLP instance.  */
1626       new_instance = XNEW (struct _slp_instance);
1627       SLP_INSTANCE_TREE (new_instance) = node;
1628       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1629       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1630       SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
1631       SLP_INSTANCE_LOADS (new_instance) = loads;
1632       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1633       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = vNULL;
1634
1635       /* Compute the load permutation.  */
1636       slp_tree load_node;
1637       bool loads_permuted = false;
1638       vec<int> load_permutation;
1639       load_permutation.create (group_size * group_size);
1640       FOR_EACH_VEC_ELT (loads, i, load_node)
1641         {
1642           int j;
1643           gimple load;
1644           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1645             {
1646               int load_place;
1647               load_place = vect_get_place_in_interleaving_chain
1648                 (load, GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)));
1649               if (load_place != j
1650                   /* ???  We allow loads from different groups to
1651                      get to here for a special case handled in
1652                      the permutation code.  Make sure we get to that.  */
1653                   || (GROUP_FIRST_ELEMENT
1654                         (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]))
1655                       != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load))))
1656                 loads_permuted = true;
1657               load_permutation.safe_push (load_place);
1658             }
1659         }
1660
1661       if (loads_permuted)
1662         {
1663           SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1664           if (!vect_supported_load_permutation_p (new_instance, group_size,
1665                                                   load_permutation))
1666             {
1667               if (dump_enabled_p ())
1668                 {
1669                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1670                                    "Build SLP failed: unsupported load "
1671                                    "permutation ");
1672                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1673                 }
1674
1675               vect_free_slp_instance (new_instance);
1676               return false;
1677             }
1678
1679           SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1680              = vect_find_first_load_in_slp_instance (new_instance);
1681         }
1682       else
1683         load_permutation.release ();
1684
1685       /* Compute the costs of this SLP instance.  */
1686       vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1687                              new_instance, TYPE_VECTOR_SUBPARTS (vectype));
1688
1689       if (loop_vinfo)
1690         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1691       else
1692         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1693
1694       if (dump_enabled_p ())
1695         vect_print_slp_tree (MSG_NOTE, node);
1696
1697       return true;
1698     }
1699
1700   /* Failed to SLP.  */
1701   /* Free the allocated memory.  */
1702   vect_free_slp_tree (node);
1703   loads.release ();
1704
1705   return false;
1706 }
1707
1708
1709 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1710    trees of packed scalar stmts if SLP is possible.  */
1711
1712 bool
1713 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1714 {
1715   unsigned int i;
1716   vec<gimple> grouped_stores;
1717   vec<gimple> reductions = vNULL;
1718   vec<gimple> reduc_chains = vNULL;
1719   gimple first_element;
1720   bool ok = false;
1721
1722   if (dump_enabled_p ())
1723     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1724
1725   if (loop_vinfo)
1726     {
1727       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1728       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1729       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1730     }
1731   else
1732     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1733
1734   /* Find SLP sequences starting from groups of grouped stores.  */
1735   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1736     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1737       ok = true;
1738
1739   if (bb_vinfo && !ok)
1740     {
1741       if (dump_enabled_p ())
1742         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1743                          "Failed to SLP the basic block.");
1744
1745       return false;
1746     }
1747
1748   if (loop_vinfo
1749       && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1750     {
1751       /* Find SLP sequences starting from reduction chains.  */
1752       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1753         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1754           ok = true;
1755         else
1756           return false;
1757
1758       /* Don't try to vectorize SLP reductions if reduction chain was
1759          detected.  */
1760       return ok;
1761     }
1762
1763   /* Find SLP sequences starting from groups of reductions.  */
1764   if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1765       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1766     ok = true;
1767
1768   return true;
1769 }
1770
1771
1772 /* For each possible SLP instance decide whether to SLP it and calculate overall
1773    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1774    least one instance.  */
1775
1776 bool
1777 vect_make_slp_decision (loop_vec_info loop_vinfo)
1778 {
1779   unsigned int i, unrolling_factor = 1;
1780   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1781   slp_instance instance;
1782   int decided_to_slp = 0;
1783
1784   if (dump_enabled_p ())
1785     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1786
1787   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1788     {
1789       /* FORNOW: SLP if you can.  */
1790       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1791         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1792
1793       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1794          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1795          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1796       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1797       decided_to_slp++;
1798     }
1799
1800   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1801
1802   if (decided_to_slp && dump_enabled_p ())
1803     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1804                      "Decided to SLP %d instances. Unrolling factor %d",
1805                      decided_to_slp, unrolling_factor);
1806
1807   return (decided_to_slp > 0);
1808 }
1809
1810
1811 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1812    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1813
1814 static void
1815 vect_detect_hybrid_slp_stmts (slp_tree node)
1816 {
1817   int i;
1818   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1819   gimple stmt = stmts[0];
1820   imm_use_iterator imm_iter;
1821   gimple use_stmt;
1822   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1823   slp_tree child;
1824   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1825   struct loop *loop = NULL;
1826   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1827   basic_block bb = NULL;
1828
1829   if (!node)
1830     return;
1831
1832   if (loop_vinfo)
1833     loop = LOOP_VINFO_LOOP (loop_vinfo);
1834   else
1835     bb = BB_VINFO_BB (bb_vinfo);
1836
1837   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1838     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1839         && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1840       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1841         if (gimple_bb (use_stmt)
1842             && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1843                  || bb == gimple_bb (use_stmt))
1844             && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1845             && !STMT_SLP_TYPE (stmt_vinfo)
1846             && (STMT_VINFO_RELEVANT (stmt_vinfo)
1847                 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1848             && !(gimple_code (use_stmt) == GIMPLE_PHI
1849                  && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1850                   == vect_reduction_def))
1851           vect_mark_slp_stmts (node, hybrid, i);
1852
1853   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1854     vect_detect_hybrid_slp_stmts (child);
1855 }
1856
1857
1858 /* Find stmts that must be both vectorized and SLPed.  */
1859
1860 void
1861 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1862 {
1863   unsigned int i;
1864   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1865   slp_instance instance;
1866
1867   if (dump_enabled_p ())
1868     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1869
1870   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1871     vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1872 }
1873
1874
1875 /* Create and initialize a new bb_vec_info struct for BB, as well as
1876    stmt_vec_info structs for all the stmts in it.  */
1877
1878 static bb_vec_info
1879 new_bb_vec_info (basic_block bb)
1880 {
1881   bb_vec_info res = NULL;
1882   gimple_stmt_iterator gsi;
1883
1884   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1885   BB_VINFO_BB (res) = bb;
1886
1887   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1888     {
1889       gimple stmt = gsi_stmt (gsi);
1890       gimple_set_uid (stmt, 0);
1891       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1892     }
1893
1894   BB_VINFO_GROUPED_STORES (res).create (10);
1895   BB_VINFO_SLP_INSTANCES (res).create (2);
1896   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1897
1898   bb->aux = res;
1899   return res;
1900 }
1901
1902
1903 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1904    stmts in the basic block.  */
1905
1906 static void
1907 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1908 {
1909   vec<slp_instance> slp_instances;
1910   slp_instance instance;
1911   basic_block bb;
1912   gimple_stmt_iterator si;
1913   unsigned i;
1914
1915   if (!bb_vinfo)
1916     return;
1917
1918   bb = BB_VINFO_BB (bb_vinfo);
1919
1920   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1921     {
1922       gimple stmt = gsi_stmt (si);
1923       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1924
1925       if (stmt_info)
1926         /* Free stmt_vec_info.  */
1927         free_stmt_vec_info (stmt);
1928     }
1929
1930   free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1931   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1932   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1933   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1934   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1935     vect_free_slp_instance (instance);
1936   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1937   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1938   free (bb_vinfo);
1939   bb->aux = NULL;
1940 }
1941
1942
1943 /* Analyze statements contained in SLP tree node after recursively analyzing
1944    the subtree. Return TRUE if the operations are supported.  */
1945
1946 static bool
1947 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1948 {
1949   bool dummy;
1950   int i;
1951   gimple stmt;
1952   slp_tree child;
1953
1954   if (!node)
1955     return true;
1956
1957   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1958     if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1959       return false;
1960
1961   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1962     {
1963       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1964       gcc_assert (stmt_info);
1965       gcc_assert (PURE_SLP_STMT (stmt_info));
1966
1967       if (!vect_analyze_stmt (stmt, &dummy, node))
1968         return false;
1969     }
1970
1971   return true;
1972 }
1973
1974
1975 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
1976    operations are supported. */
1977
1978 static bool
1979 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1980 {
1981   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1982   slp_instance instance;
1983   int i;
1984
1985   for (i = 0; slp_instances.iterate (i, &instance); )
1986     {
1987       if (!vect_slp_analyze_node_operations (bb_vinfo,
1988                                              SLP_INSTANCE_TREE (instance)))
1989         {
1990           vect_free_slp_instance (instance);
1991           slp_instances.ordered_remove (i);
1992         }
1993       else
1994         i++;
1995     }
1996
1997   if (!slp_instances.length ())
1998     return false;
1999
2000   return true;
2001 }
2002
2003 /* Check if vectorization of the basic block is profitable.  */
2004
2005 static bool
2006 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2007 {
2008   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2009   slp_instance instance;
2010   int i, j;
2011   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2012   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2013   unsigned int stmt_cost;
2014   gimple stmt;
2015   gimple_stmt_iterator si;
2016   basic_block bb = BB_VINFO_BB (bb_vinfo);
2017   void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2018   stmt_vec_info stmt_info = NULL;
2019   stmt_vector_for_cost body_cost_vec;
2020   stmt_info_for_cost *ci;
2021
2022   /* Calculate vector costs.  */
2023   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2024     {
2025       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2026
2027       FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2028         {
2029           stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2030           (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2031                                 stmt_info, ci->misalign, vect_body);
2032         }
2033     }
2034
2035   /* Calculate scalar cost.  */
2036   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2037     {
2038       stmt = gsi_stmt (si);
2039       stmt_info = vinfo_for_stmt (stmt);
2040
2041       if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2042           || !PURE_SLP_STMT (stmt_info))
2043         continue;
2044
2045       if (STMT_VINFO_DATA_REF (stmt_info))
2046         {
2047           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2048             stmt_cost = vect_get_stmt_cost (scalar_load);
2049           else
2050             stmt_cost = vect_get_stmt_cost (scalar_store);
2051         }
2052       else
2053         stmt_cost = vect_get_stmt_cost (scalar_stmt);
2054
2055       scalar_cost += stmt_cost;
2056     }
2057
2058   /* Complete the target-specific cost calculation.  */
2059   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2060                &vec_inside_cost, &vec_epilogue_cost);
2061
2062   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2063
2064   if (dump_enabled_p ())
2065     {
2066       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2067       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2068                    vec_inside_cost);
2069       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2070       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2071       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d", scalar_cost);
2072     }
2073
2074   /* Vectorization is profitable if its cost is less than the cost of scalar
2075      version.  */
2076   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2077     return false;
2078
2079   return true;
2080 }
2081
2082 /* Check if the basic block can be vectorized.  */
2083
2084 static bb_vec_info
2085 vect_slp_analyze_bb_1 (basic_block bb)
2086 {
2087   bb_vec_info bb_vinfo;
2088   vec<slp_instance> slp_instances;
2089   slp_instance instance;
2090   int i;
2091   int min_vf = 2;
2092
2093   bb_vinfo = new_bb_vec_info (bb);
2094   if (!bb_vinfo)
2095     return NULL;
2096
2097   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2098     {
2099       if (dump_enabled_p ())
2100         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2101                          "not vectorized: unhandled data-ref in basic "
2102                          "block.\n");
2103
2104       destroy_bb_vec_info (bb_vinfo);
2105       return NULL;
2106     }
2107
2108   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2109     {
2110       if (dump_enabled_p ())
2111         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2112                          "not vectorized: not enough data-refs in "
2113                          "basic block.\n");
2114
2115       destroy_bb_vec_info (bb_vinfo);
2116       return NULL;
2117     }
2118
2119   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2120     {
2121      if (dump_enabled_p ())
2122        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2123                         "not vectorized: unhandled data access in "
2124                         "basic block.\n");
2125
2126       destroy_bb_vec_info (bb_vinfo);
2127       return NULL;
2128     }
2129
2130   vect_pattern_recog (NULL, bb_vinfo);
2131
2132   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2133      {
2134        if (dump_enabled_p ())
2135          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2136                           "not vectorized: unhandled data dependence "
2137                           "in basic block.\n");
2138
2139        destroy_bb_vec_info (bb_vinfo);
2140        return NULL;
2141      }
2142
2143   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2144     {
2145       if (dump_enabled_p ())
2146         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2147                          "not vectorized: bad data alignment in basic "
2148                          "block.\n");
2149
2150       destroy_bb_vec_info (bb_vinfo);
2151       return NULL;
2152     }
2153
2154   /* Check the SLP opportunities in the basic block, analyze and build SLP
2155      trees.  */
2156   if (!vect_analyze_slp (NULL, bb_vinfo))
2157     {
2158       if (dump_enabled_p ())
2159         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2160                          "not vectorized: failed to find SLP opportunities "
2161                          "in basic block.\n");
2162
2163       destroy_bb_vec_info (bb_vinfo);
2164       return NULL;
2165     }
2166
2167   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2168
2169   /* Mark all the statements that we want to vectorize as pure SLP and
2170      relevant.  */
2171   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2172     {
2173       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2174       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2175     }
2176
2177   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2178     {
2179       if (dump_enabled_p ())
2180         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2181                          "not vectorized: unsupported alignment in basic "
2182                          "block.\n");
2183       destroy_bb_vec_info (bb_vinfo);
2184       return NULL;
2185     }
2186
2187   if (!vect_slp_analyze_operations (bb_vinfo))
2188     {
2189       if (dump_enabled_p ())
2190         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2191                          "not vectorized: bad operation in basic block.\n");
2192
2193       destroy_bb_vec_info (bb_vinfo);
2194       return NULL;
2195     }
2196
2197   /* Cost model: check if the vectorization is worthwhile.  */
2198   if (flag_vect_cost_model
2199       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2200     {
2201       if (dump_enabled_p ())
2202         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2203                          "not vectorized: vectorization is not "
2204                          "profitable.\n");
2205
2206       destroy_bb_vec_info (bb_vinfo);
2207       return NULL;
2208     }
2209
2210   if (dump_enabled_p ())
2211     dump_printf_loc (MSG_NOTE, vect_location,
2212                      "Basic block will be vectorized using SLP\n");
2213
2214   return bb_vinfo;
2215 }
2216
2217
2218 bb_vec_info
2219 vect_slp_analyze_bb (basic_block bb)
2220 {
2221   bb_vec_info bb_vinfo;
2222   int insns = 0;
2223   gimple_stmt_iterator gsi;
2224   unsigned int vector_sizes;
2225
2226   if (dump_enabled_p ())
2227     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2228
2229   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2230     {
2231       gimple stmt = gsi_stmt (gsi);
2232       if (!is_gimple_debug (stmt)
2233           && !gimple_nop_p (stmt)
2234           && gimple_code (stmt) != GIMPLE_LABEL)
2235         insns++;
2236     }
2237
2238   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2239     {
2240       if (dump_enabled_p ())
2241         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2242                          "not vectorized: too many instructions in "
2243                          "basic block.\n");
2244
2245       return NULL;
2246     }
2247
2248   /* Autodetect first vector size we try.  */
2249   current_vector_size = 0;
2250   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2251
2252   while (1)
2253     {
2254       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2255       if (bb_vinfo)
2256         return bb_vinfo;
2257
2258       destroy_bb_vec_info (bb_vinfo);
2259
2260       vector_sizes &= ~current_vector_size;
2261       if (vector_sizes == 0
2262           || current_vector_size == 0)
2263         return NULL;
2264
2265       /* Try the next biggest vector size.  */
2266       current_vector_size = 1 << floor_log2 (vector_sizes);
2267       if (dump_enabled_p ())
2268         dump_printf_loc (MSG_NOTE, vect_location,
2269                          "***** Re-trying analysis with "
2270                          "vector size %d\n", current_vector_size);
2271     }
2272 }
2273
2274
2275 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2276    the number of created vector stmts depends on the unrolling factor).
2277    However, the actual number of vector stmts for every SLP node depends on
2278    VF which is set later in vect_analyze_operations ().  Hence, SLP costs
2279    should be updated.  In this function we assume that the inside costs
2280    calculated in vect_model_xxx_cost are linear in ncopies.  */
2281
2282 void
2283 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2284 {
2285   unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2286   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2287   slp_instance instance;
2288   stmt_vector_for_cost body_cost_vec;
2289   stmt_info_for_cost *si;
2290   void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2291
2292   if (dump_enabled_p ())
2293     dump_printf_loc (MSG_NOTE, vect_location,
2294                      "=== vect_update_slp_costs_according_to_vf ===");
2295
2296   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2297     {
2298       /* We assume that costs are linear in ncopies.  */
2299       int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2300
2301       /* Record the instance's instructions in the target cost model.
2302          This was delayed until here because the count of instructions
2303          isn't known beforehand.  */
2304       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2305
2306       FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2307         (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2308                               vinfo_for_stmt (si->stmt), si->misalign,
2309                               vect_body);
2310     }
2311 }
2312
2313
2314 /* For constant and loop invariant defs of SLP_NODE this function returns
2315    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2316    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2317    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2318    REDUC_INDEX is the index of the reduction operand in the statements, unless
2319    it is -1.  */
2320
2321 static void
2322 vect_get_constant_vectors (tree op, slp_tree slp_node,
2323                            vec<tree> *vec_oprnds,
2324                            unsigned int op_num, unsigned int number_of_vectors,
2325                            int reduc_index)
2326 {
2327   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2328   gimple stmt = stmts[0];
2329   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2330   unsigned nunits;
2331   tree vec_cst;
2332   tree *elts;
2333   unsigned j, number_of_places_left_in_vector;
2334   tree vector_type;
2335   tree vop;
2336   int group_size = stmts.length ();
2337   unsigned int vec_num, i;
2338   unsigned number_of_copies = 1;
2339   vec<tree> voprnds;
2340   voprnds.create (number_of_vectors);
2341   bool constant_p, is_store;
2342   tree neutral_op = NULL;
2343   enum tree_code code = gimple_expr_code (stmt);
2344   gimple def_stmt;
2345   struct loop *loop;
2346   gimple_seq ctor_seq = NULL;
2347
2348   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2349       && reduc_index != -1)
2350     {
2351       op_num = reduc_index - 1;
2352       op = gimple_op (stmt, reduc_index);
2353       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2354          we need either neutral operands or the original operands.  See
2355          get_initial_def_for_reduction() for details.  */
2356       switch (code)
2357         {
2358           case WIDEN_SUM_EXPR:
2359           case DOT_PROD_EXPR:
2360           case PLUS_EXPR:
2361           case MINUS_EXPR:
2362           case BIT_IOR_EXPR:
2363           case BIT_XOR_EXPR:
2364              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2365                neutral_op = build_real (TREE_TYPE (op), dconst0);
2366              else
2367                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2368
2369              break;
2370
2371           case MULT_EXPR:
2372              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2373                neutral_op = build_real (TREE_TYPE (op), dconst1);
2374              else
2375                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2376
2377              break;
2378
2379           case BIT_AND_EXPR:
2380             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2381             break;
2382
2383           case MAX_EXPR:
2384           case MIN_EXPR:
2385             def_stmt = SSA_NAME_DEF_STMT (op);
2386             loop = (gimple_bb (stmt))->loop_father;
2387             neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2388                                                 loop_preheader_edge (loop));
2389             break;
2390
2391           default:
2392             neutral_op = NULL;
2393         }
2394     }
2395
2396   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2397     {
2398       is_store = true;
2399       op = gimple_assign_rhs1 (stmt);
2400     }
2401   else
2402     is_store = false;
2403
2404   gcc_assert (op);
2405
2406   if (CONSTANT_CLASS_P (op))
2407     constant_p = true;
2408   else
2409     constant_p = false;
2410
2411   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2412   gcc_assert (vector_type);
2413   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2414
2415   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2416      created vectors. It is greater than 1 if unrolling is performed.
2417
2418      For example, we have two scalar operands, s1 and s2 (e.g., group of
2419      strided accesses of size two), while NUNITS is four (i.e., four scalars
2420      of this type can be packed in a vector).  The output vector will contain
2421      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2422      will be 2).
2423
2424      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2425      containing the operands.
2426
2427      For example, NUNITS is four as before, and the group size is 8
2428      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2429      {s5, s6, s7, s8}.  */
2430
2431   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2432
2433   number_of_places_left_in_vector = nunits;
2434   elts = XALLOCAVEC (tree, nunits);
2435   for (j = 0; j < number_of_copies; j++)
2436     {
2437       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2438         {
2439           if (is_store)
2440             op = gimple_assign_rhs1 (stmt);
2441           else
2442             {
2443               switch (code)
2444                 {
2445                   case COND_EXPR:
2446                     if (op_num == 0 || op_num == 1)
2447                       {
2448                         tree cond = gimple_assign_rhs1 (stmt);
2449                         op = TREE_OPERAND (cond, op_num);
2450                       }
2451                     else
2452                       {
2453                         if (op_num == 2)
2454                           op = gimple_assign_rhs2 (stmt);
2455                         else
2456                           op = gimple_assign_rhs3 (stmt);
2457                       }
2458                     break;
2459
2460                   case CALL_EXPR:
2461                     op = gimple_call_arg (stmt, op_num);
2462                     break;
2463
2464                   case LSHIFT_EXPR:
2465                   case RSHIFT_EXPR:
2466                   case LROTATE_EXPR:
2467                   case RROTATE_EXPR:
2468                     op = gimple_op (stmt, op_num + 1);
2469                     /* Unlike the other binary operators, shifts/rotates have
2470                        the shift count being int, instead of the same type as
2471                        the lhs, so make sure the scalar is the right type if
2472                        we are dealing with vectors of
2473                        long long/long/short/char.  */
2474                     if (op_num == 1 && constant_p)
2475                       op = fold_convert (TREE_TYPE (vector_type), op);
2476                     break;
2477
2478                   default:
2479                     op = gimple_op (stmt, op_num + 1);
2480                     break;
2481                 }
2482             }
2483
2484           if (reduc_index != -1)
2485             {
2486               loop = (gimple_bb (stmt))->loop_father;
2487               def_stmt = SSA_NAME_DEF_STMT (op);
2488
2489               gcc_assert (loop);
2490
2491               /* Get the def before the loop.  In reduction chain we have only
2492                  one initial value.  */
2493               if ((j != (number_of_copies - 1)
2494                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2495                        && i != 0))
2496                   && neutral_op)
2497                 op = neutral_op;
2498               else
2499                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2500                                             loop_preheader_edge (loop));
2501             }
2502
2503           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2504           number_of_places_left_in_vector--;
2505           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2506             {
2507               if (constant_p)
2508                 {
2509                   op = fold_unary (VIEW_CONVERT_EXPR,
2510                                    TREE_TYPE (vector_type), op);
2511                   gcc_assert (op && CONSTANT_CLASS_P (op));
2512                 }
2513               else
2514                 {
2515                   tree new_temp
2516                     = make_ssa_name (TREE_TYPE (vector_type), NULL);
2517                   gimple init_stmt;
2518                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2519                                op);               
2520                   init_stmt
2521                     = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2522                                                     new_temp, op, NULL_TREE);
2523                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2524                   op = new_temp;
2525                 }
2526             }
2527           elts[number_of_places_left_in_vector] = op;
2528
2529           if (number_of_places_left_in_vector == 0)
2530             {
2531               number_of_places_left_in_vector = nunits;
2532
2533               if (constant_p)
2534                 vec_cst = build_vector (vector_type, elts);
2535               else
2536                 {
2537                   vec<constructor_elt, va_gc> *v;
2538                   unsigned k;
2539                   vec_alloc (v, nunits);
2540                   for (k = 0; k < nunits; ++k)
2541                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2542                   vec_cst = build_constructor (vector_type, v);
2543                 }
2544               voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2545                                                     vector_type, NULL));
2546               if (ctor_seq != NULL)
2547                 {
2548                   gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2549                   gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2550                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2551                                                         GSI_SAME_STMT);
2552                   ctor_seq = NULL;
2553                 }
2554             }
2555         }
2556     }
2557
2558   /* Since the vectors are created in the reverse order, we should invert
2559      them.  */
2560   vec_num = voprnds.length ();
2561   for (j = vec_num; j != 0; j--)
2562     {
2563       vop = voprnds[j - 1];
2564       vec_oprnds->quick_push (vop);
2565     }
2566
2567   voprnds.release ();
2568
2569   /* In case that VF is greater than the unrolling factor needed for the SLP
2570      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2571      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2572      to replicate the vectors.  */
2573   while (number_of_vectors > vec_oprnds->length ())
2574     {
2575       tree neutral_vec = NULL;
2576
2577       if (neutral_op)
2578         {
2579           if (!neutral_vec)
2580             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2581
2582           vec_oprnds->quick_push (neutral_vec);
2583         }
2584       else
2585         {
2586           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2587             vec_oprnds->quick_push (vop);
2588         }
2589     }
2590 }
2591
2592
2593 /* Get vectorized definitions from SLP_NODE that contains corresponding
2594    vectorized def-stmts.  */
2595
2596 static void
2597 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2598 {
2599   tree vec_oprnd;
2600   gimple vec_def_stmt;
2601   unsigned int i;
2602
2603   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2604
2605   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2606     {
2607       gcc_assert (vec_def_stmt);
2608       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2609       vec_oprnds->quick_push (vec_oprnd);
2610     }
2611 }
2612
2613
2614 /* Get vectorized definitions for SLP_NODE.
2615    If the scalar definitions are loop invariants or constants, collect them and
2616    call vect_get_constant_vectors() to create vector stmts.
2617    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2618    must be stored in the corresponding child of SLP_NODE, and we call
2619    vect_get_slp_vect_defs () to retrieve them.  */
2620
2621 void
2622 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2623                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2624 {
2625   gimple first_stmt;
2626   int number_of_vects = 0, i;
2627   unsigned int child_index = 0;
2628   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2629   slp_tree child = NULL;
2630   vec<tree> vec_defs;
2631   tree oprnd;
2632   bool vectorized_defs;
2633
2634   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2635   FOR_EACH_VEC_ELT (ops, i, oprnd)
2636     {
2637       /* For each operand we check if it has vectorized definitions in a child
2638          node or we need to create them (for invariants and constants).  We
2639          check if the LHS of the first stmt of the next child matches OPRND.
2640          If it does, we found the correct child.  Otherwise, we call
2641          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2642          to check this child node for the next operand.  */
2643       vectorized_defs = false;
2644       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2645         {
2646           child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
2647
2648           /* We have to check both pattern and original def, if available.  */
2649           gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2650           gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2651
2652           if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2653               || (related
2654                   && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2655             {
2656               /* The number of vector defs is determined by the number of
2657                  vector statements in the node from which we get those
2658                  statements.  */
2659               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2660               vectorized_defs = true;
2661               child_index++;
2662             }
2663         }
2664
2665       if (!vectorized_defs)
2666         {
2667           if (i == 0)
2668             {
2669               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2670               /* Number of vector stmts was calculated according to LHS in
2671                  vect_schedule_slp_instance (), fix it by replacing LHS with
2672                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2673                  details.  */
2674               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2675                                              &rhs_size_unit);
2676               if (rhs_size_unit != lhs_size_unit)
2677                 {
2678                   number_of_vects *= rhs_size_unit;
2679                   number_of_vects /= lhs_size_unit;
2680                 }
2681             }
2682         }
2683
2684       /* Allocate memory for vectorized defs.  */
2685       vec_defs = vNULL;
2686       vec_defs.create (number_of_vects);
2687
2688       /* For reduction defs we call vect_get_constant_vectors (), since we are
2689          looking for initial loop invariant values.  */
2690       if (vectorized_defs && reduc_index == -1)
2691         /* The defs are already vectorized.  */
2692         vect_get_slp_vect_defs (child, &vec_defs);
2693       else
2694         /* Build vectors from scalar defs.  */
2695         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2696                                    number_of_vects, reduc_index);
2697
2698       vec_oprnds->quick_push (vec_defs);
2699
2700       /* For reductions, we only need initial values.  */
2701       if (reduc_index != -1)
2702         return;
2703     }
2704 }
2705
2706
2707 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2708    building a vector of type MASK_TYPE from it) and two input vectors placed in
2709    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2710    shifting by STRIDE elements of DR_CHAIN for every copy.
2711    (STRIDE is the number of vectorized stmts for NODE divided by the number of
2712    copies).
2713    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2714    the created stmts must be inserted.  */
2715
2716 static inline void
2717 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2718                            tree mask, int first_vec_indx, int second_vec_indx,
2719                            gimple_stmt_iterator *gsi, slp_tree node,
2720                            tree vectype, vec<tree> dr_chain,
2721                            int ncopies, int vect_stmts_counter)
2722 {
2723   tree perm_dest;
2724   gimple perm_stmt = NULL;
2725   stmt_vec_info next_stmt_info;
2726   int i, stride;
2727   tree first_vec, second_vec, data_ref;
2728
2729   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2730
2731   /* Initialize the vect stmts of NODE to properly insert the generated
2732      stmts later.  */
2733   for (i = SLP_TREE_VEC_STMTS (node).length ();
2734        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2735     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2736
2737   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2738   for (i = 0; i < ncopies; i++)
2739     {
2740       first_vec = dr_chain[first_vec_indx];
2741       second_vec = dr_chain[second_vec_indx];
2742
2743       /* Generate the permute statement.  */
2744       perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2745                                                 first_vec, second_vec, mask);
2746       data_ref = make_ssa_name (perm_dest, perm_stmt);
2747       gimple_set_lhs (perm_stmt, data_ref);
2748       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2749
2750       /* Store the vector statement in NODE.  */
2751       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2752
2753       first_vec_indx += stride;
2754       second_vec_indx += stride;
2755     }
2756
2757   /* Mark the scalar stmt as vectorized.  */
2758   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2759   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2760 }
2761
2762
2763 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2764    return in CURRENT_MASK_ELEMENT its equivalent in target specific
2765    representation.  Check that the mask is valid and return FALSE if not.
2766    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2767    the next vector, i.e., the current first vector is not needed.  */
2768
2769 static bool
2770 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2771                        int mask_nunits, bool only_one_vec, int index,
2772                        unsigned char *mask, int *current_mask_element,
2773                        bool *need_next_vector, int *number_of_mask_fixes,
2774                        bool *mask_fixed, bool *needs_first_vector)
2775 {
2776   int i;
2777
2778   /* Convert to target specific representation.  */
2779   *current_mask_element = first_mask_element + m;
2780   /* Adjust the value in case it's a mask for second and third vectors.  */
2781   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2782
2783   if (*current_mask_element < mask_nunits)
2784     *needs_first_vector = true;
2785
2786   /* We have only one input vector to permute but the mask accesses values in
2787      the next vector as well.  */
2788   if (only_one_vec && *current_mask_element >= mask_nunits)
2789     {
2790       if (dump_enabled_p ())
2791         {
2792           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2793                            "permutation requires at least two vectors ");
2794           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2795         }
2796
2797       return false;
2798     }
2799
2800   /* The mask requires the next vector.  */
2801   if (*current_mask_element >= mask_nunits * 2)
2802     {
2803       if (*needs_first_vector || *mask_fixed)
2804         {
2805           /* We either need the first vector too or have already moved to the
2806              next vector. In both cases, this permutation needs three
2807              vectors.  */
2808           if (dump_enabled_p ())
2809             {
2810               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2811                                "permutation requires at "
2812                                "least three vectors ");
2813               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2814             }
2815
2816           return false;
2817         }
2818
2819       /* We move to the next vector, dropping the first one and working with
2820          the second and the third - we need to adjust the values of the mask
2821          accordingly.  */
2822       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2823
2824       for (i = 0; i < index; i++)
2825         mask[i] -= mask_nunits * *number_of_mask_fixes;
2826
2827       (*number_of_mask_fixes)++;
2828       *mask_fixed = true;
2829     }
2830
2831   *need_next_vector = *mask_fixed;
2832
2833   /* This was the last element of this mask. Start a new one.  */
2834   if (index == mask_nunits - 1)
2835     {
2836       *number_of_mask_fixes = 1;
2837       *mask_fixed = false;
2838       *needs_first_vector = false;
2839     }
2840
2841   return true;
2842 }
2843
2844
2845 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2846    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2847    permute statements for SLP_NODE_INSTANCE.  */
2848 bool
2849 vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
2850                               gimple_stmt_iterator *gsi, int vf,
2851                               slp_instance slp_node_instance, bool analyze_only)
2852 {
2853   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2854   tree mask_element_type = NULL_TREE, mask_type;
2855   int i, j, k, nunits, vec_index = 0, scalar_index;
2856   slp_tree node;
2857   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2858   gimple next_scalar_stmt;
2859   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2860   int first_mask_element;
2861   int index, unroll_factor, current_mask_element, ncopies;
2862   unsigned char *mask;
2863   bool only_one_vec = false, need_next_vector = false;
2864   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2865   int number_of_mask_fixes = 1;
2866   bool mask_fixed = false;
2867   bool needs_first_vector = false;
2868   enum machine_mode mode;
2869
2870   mode = TYPE_MODE (vectype);
2871
2872   if (!can_vec_perm_p (mode, false, NULL))
2873     {
2874       if (dump_enabled_p ())
2875         {
2876           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2877                            "no vect permute for ");
2878           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2879         }
2880       return false;
2881     }
2882
2883   /* The generic VEC_PERM_EXPR code always uses an integral type of the
2884      same size as the vector element being permuted.  */
2885   mask_element_type = lang_hooks.types.type_for_mode
2886                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2887   mask_type = get_vectype_for_scalar_type (mask_element_type);
2888   nunits = TYPE_VECTOR_SUBPARTS (vectype);
2889   mask = XALLOCAVEC (unsigned char, nunits);
2890   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2891
2892   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2893      unrolling factor.  */
2894   orig_vec_stmts_num = group_size *
2895                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2896   if (orig_vec_stmts_num == 1)
2897     only_one_vec = true;
2898
2899   /* Number of copies is determined by the final vectorization factor
2900      relatively to SLP_NODE_INSTANCE unrolling factor.  */
2901   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2902
2903   /* Generate permutation masks for every NODE. Number of masks for each NODE
2904      is equal to GROUP_SIZE.
2905      E.g., we have a group of three nodes with three loads from the same
2906      location in each node, and the vector size is 4. I.e., we have a
2907      a0b0c0a1b1c1... sequence and we need to create the following vectors:
2908      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2909      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2910      ...
2911
2912      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2913      The last mask is illegal since we assume two operands for permute
2914      operation, and the mask element values can't be outside that range.
2915      Hence, the last mask must be converted into {2,5,5,5}.
2916      For the first two permutations we need the first and the second input
2917      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2918      we need the second and the third vectors: {b1,c1,a2,b2} and
2919      {c2,a3,b3,c3}.  */
2920
2921   FOR_EACH_VEC_ELT  (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2922     {
2923       scalar_index = 0;
2924       index = 0;
2925       vect_stmts_counter = 0;
2926       vec_index = 0;
2927       first_vec_index = vec_index++;
2928       if (only_one_vec)
2929         second_vec_index = first_vec_index;
2930       else
2931         second_vec_index =  vec_index++;
2932
2933       for (j = 0; j < unroll_factor; j++)
2934         {
2935           for (k = 0; k < group_size; k++)
2936             {
2937               first_mask_element = i + j * group_size;
2938               if (!vect_get_mask_element (stmt, first_mask_element, 0,
2939                                           nunits, only_one_vec, index,
2940                                           mask, &current_mask_element,
2941                                           &need_next_vector,
2942                                           &number_of_mask_fixes, &mask_fixed,
2943                                           &needs_first_vector))
2944                 return false;
2945               mask[index++] = current_mask_element;
2946
2947               if (index == nunits)
2948                 {
2949                   tree mask_vec, *mask_elts;
2950                   int l;
2951
2952                   if (!can_vec_perm_p (mode, false, mask))
2953                     {
2954                       if (dump_enabled_p ())
2955                         {
2956                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2957                                            vect_location, 
2958                                            "unsupported vect permute { ");
2959                           for (i = 0; i < nunits; ++i)
2960                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2961                                          mask[i]);
2962                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2963                         }
2964                       return false;
2965                     }
2966
2967                   mask_elts = XALLOCAVEC (tree, nunits);
2968                   for (l = 0; l < nunits; ++l)
2969                     mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2970                   mask_vec = build_vector (mask_type, mask_elts);
2971                   index = 0;
2972
2973                   if (!analyze_only)
2974                     {
2975                       if (need_next_vector)
2976                         {
2977                           first_vec_index = second_vec_index;
2978                           second_vec_index = vec_index;
2979                         }
2980
2981                       next_scalar_stmt
2982                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
2983
2984                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
2985                                mask_vec, first_vec_index, second_vec_index,
2986                                gsi, node, vectype, dr_chain,
2987                                ncopies, vect_stmts_counter++);
2988                     }
2989                 }
2990             }
2991         }
2992     }
2993
2994   return true;
2995 }
2996
2997
2998
2999 /* Vectorize SLP instance tree in postorder.  */
3000
3001 static bool
3002 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3003                             unsigned int vectorization_factor)
3004 {
3005   gimple stmt;
3006   bool grouped_store, is_store;
3007   gimple_stmt_iterator si;
3008   stmt_vec_info stmt_info;
3009   unsigned int vec_stmts_size, nunits, group_size;
3010   tree vectype;
3011   int i;
3012   slp_tree loads_node;
3013   slp_tree child;
3014
3015   if (!node)
3016     return false;
3017
3018   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3019     vect_schedule_slp_instance (child, instance, vectorization_factor);
3020
3021   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3022   stmt_info = vinfo_for_stmt (stmt);
3023
3024   /* VECTYPE is the type of the destination.  */
3025   vectype = STMT_VINFO_VECTYPE (stmt_info);
3026   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3027   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3028
3029   /* For each SLP instance calculate number of vector stmts to be created
3030      for the scalar stmts in each node of the SLP tree.  Number of vector
3031      elements in one vector iteration is the number of scalar elements in
3032      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3033      size.  */
3034   vec_stmts_size = (vectorization_factor * group_size) / nunits;
3035
3036   /* In case of load permutation we have to allocate vectorized statements for
3037      all the nodes that participate in that permutation.  */
3038   if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3039     {
3040       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
3041         {
3042           if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
3043             {
3044               SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
3045               SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3046             }
3047         }
3048     }
3049
3050   if (!SLP_TREE_VEC_STMTS (node).exists ())
3051     {
3052       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3053       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3054     }
3055
3056   if (dump_enabled_p ())
3057     {
3058       dump_printf_loc (MSG_NOTE,vect_location,
3059                        "------>vectorizing SLP node starting from: ");
3060       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3061     }
3062
3063   /* Loads should be inserted before the first load.  */
3064   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3065       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3066       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3067       && SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3068     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3069   else if (is_pattern_stmt_p (stmt_info))
3070     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3071   else
3072     si = gsi_for_stmt (stmt);
3073
3074   /* Stores should be inserted just before the last store.  */
3075   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3076       && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3077     { 
3078       gimple last_store = vect_find_last_store_in_slp_instance (instance);
3079       if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3080        last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3081       si = gsi_for_stmt (last_store);
3082     }
3083
3084   /* Mark the first element of the reduction chain as reduction to properly
3085      transform the node.  In the analysis phase only the last element of the
3086      chain is marked as reduction.  */
3087   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3088       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3089     {
3090       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3091       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3092     }
3093
3094   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3095   return is_store;
3096 }
3097
3098 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3099    For loop vectorization this is done in vectorizable_call, but for SLP
3100    it needs to be deferred until end of vect_schedule_slp, because multiple
3101    SLP instances may refer to the same scalar stmt.  */
3102
3103 static void
3104 vect_remove_slp_scalar_calls (slp_tree node)
3105 {
3106   gimple stmt, new_stmt;
3107   gimple_stmt_iterator gsi;
3108   int i;
3109   slp_tree child;
3110   tree lhs;
3111   stmt_vec_info stmt_info;
3112
3113   if (!node)
3114     return;
3115
3116   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3117     vect_remove_slp_scalar_calls (child);
3118
3119   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3120     {
3121       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3122         continue;
3123       stmt_info = vinfo_for_stmt (stmt);
3124       if (stmt_info == NULL
3125           || is_pattern_stmt_p (stmt_info)
3126           || !PURE_SLP_STMT (stmt_info))
3127         continue;
3128       lhs = gimple_call_lhs (stmt);
3129       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3130       set_vinfo_for_stmt (new_stmt, stmt_info);
3131       set_vinfo_for_stmt (stmt, NULL);
3132       STMT_VINFO_STMT (stmt_info) = new_stmt;
3133       gsi = gsi_for_stmt (stmt);
3134       gsi_replace (&gsi, new_stmt, false);
3135       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3136     }
3137 }
3138
3139 /* Generate vector code for all SLP instances in the loop/basic block.  */
3140
3141 bool
3142 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3143 {
3144   vec<slp_instance> slp_instances;
3145   slp_instance instance;
3146   slp_tree loads_node;
3147   unsigned int i, j, vf;
3148   bool is_store = false;
3149
3150   if (loop_vinfo)
3151     {
3152       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3153       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3154     }
3155   else
3156     {
3157       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3158       vf = 1;
3159     }
3160
3161   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3162     {
3163       /* Schedule the tree of INSTANCE.  */
3164       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3165                                              instance, vf);
3166
3167       /* Clear STMT_VINFO_VEC_STMT of all loads.  With shared loads
3168          between SLP instances we fail to properly initialize the
3169          vectorized SLP stmts and confuse different load permutations.  */
3170       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, loads_node)
3171         STMT_VINFO_VEC_STMT
3172           (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (loads_node)[0])) = NULL;
3173
3174       if (dump_enabled_p ())
3175         dump_printf_loc (MSG_NOTE, vect_location,
3176                          "vectorizing stmts using SLP.");
3177     }
3178
3179   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3180     {
3181       slp_tree root = SLP_INSTANCE_TREE (instance);
3182       gimple store;
3183       unsigned int j;
3184       gimple_stmt_iterator gsi;
3185
3186       /* Remove scalar call stmts.  Do not do this for basic-block
3187          vectorization as not all uses may be vectorized.
3188          ???  Why should this be necessary?  DCE should be able to
3189          remove the stmts itself.
3190          ???  For BB vectorization we can as well remove scalar
3191          stmts starting from the SLP tree root if they have no
3192          uses.  */
3193       if (loop_vinfo)
3194         vect_remove_slp_scalar_calls (root);
3195
3196       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3197                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3198         {
3199           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3200             break;
3201
3202          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3203            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3204           /* Free the attached stmt_vec_info and remove the stmt.  */
3205           gsi = gsi_for_stmt (store);
3206           unlink_stmt_vdef (store);
3207           gsi_remove (&gsi, true);
3208           release_defs (store);
3209           free_stmt_vec_info (store);
3210         }
3211     }
3212
3213   return is_store;
3214 }
3215
3216
3217 /* Vectorize the basic block.  */
3218
3219 void
3220 vect_slp_transform_bb (basic_block bb)
3221 {
3222   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3223   gimple_stmt_iterator si;
3224
3225   gcc_assert (bb_vinfo);
3226
3227   if (dump_enabled_p ())
3228     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3229
3230   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3231     {
3232       gimple stmt = gsi_stmt (si);
3233       stmt_vec_info stmt_info;
3234
3235       if (dump_enabled_p ())
3236         {
3237           dump_printf_loc (MSG_NOTE, vect_location,
3238                            "------>SLPing statement: ");
3239           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3240         }
3241
3242       stmt_info = vinfo_for_stmt (stmt);
3243       gcc_assert (stmt_info);
3244
3245       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3246       if (STMT_SLP_TYPE (stmt_info))
3247         {
3248           vect_schedule_slp (NULL, bb_vinfo);
3249           break;
3250         }
3251     }
3252
3253   if (dump_enabled_p ())
3254     dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
3255
3256   destroy_bb_vec_info (bb_vinfo);
3257 }