tree-vectorizer.h (slp_void_p): Remove.
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2013 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h"              /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "tree-vectorizer.h"
39 #include "langhooks.h"
40
41 /* Extract the location of the basic block in the source code.
42    Return the basic block location if succeed and NULL if not.  */
43
44 LOC
45 find_bb_location (basic_block bb)
46 {
47   gimple stmt = NULL;
48   gimple_stmt_iterator si;
49
50   if (!bb)
51     return UNKNOWN_LOC;
52
53   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54     {
55       stmt = gsi_stmt (si);
56       if (gimple_location (stmt) != UNKNOWN_LOC)
57         return gimple_location (stmt);
58     }
59
60   return UNKNOWN_LOC;
61 }
62
63
64 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
65
66 static void
67 vect_free_slp_tree (slp_tree node)
68 {
69   int i;
70   slp_tree child;
71
72   if (!node)
73     return;
74
75   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
76     vect_free_slp_tree (child);
77
78   SLP_TREE_CHILDREN (node).release ();
79   SLP_TREE_SCALAR_STMTS (node).release ();
80   SLP_TREE_VEC_STMTS (node).release ();
81
82   free (node);
83 }
84
85
86 /* Free the memory allocated for the SLP instance.  */
87
88 void
89 vect_free_slp_instance (slp_instance instance)
90 {
91   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
92   SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
93   SLP_INSTANCE_LOADS (instance).release ();
94   SLP_INSTANCE_BODY_COST_VEC (instance).release ();
95   free (instance);
96 }
97
98
99 /* Create an SLP node for SCALAR_STMTS.  */
100
101 static slp_tree
102 vect_create_new_slp_node (vec<gimple> scalar_stmts)
103 {
104   slp_tree node;
105   gimple stmt = scalar_stmts[0];
106   unsigned int nops;
107
108   if (is_gimple_call (stmt))
109     nops = gimple_call_num_args (stmt);
110   else if (is_gimple_assign (stmt))
111     {
112       nops = gimple_num_ops (stmt) - 1;
113       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114         nops++;
115     }
116   else
117     return NULL;
118
119   node = XNEW (struct _slp_tree);
120   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
121   SLP_TREE_VEC_STMTS (node).create (0);
122   SLP_TREE_CHILDREN (node).create (nops);
123
124   return node;
125 }
126
127
128 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129    operand.  */
130 static vec<slp_oprnd_info> 
131 vect_create_oprnd_info (int nops, int group_size)
132 {
133   int i;
134   slp_oprnd_info oprnd_info;
135   vec<slp_oprnd_info> oprnds_info;
136
137   oprnds_info.create (nops);
138   for (i = 0; i < nops; i++)
139     {
140       oprnd_info = XNEW (struct _slp_oprnd_info);
141       oprnd_info->def_stmts.create (group_size);
142       oprnd_info->first_dt = vect_uninitialized_def;
143       oprnd_info->first_def_type = NULL_TREE;
144       oprnd_info->first_const_oprnd = NULL_TREE;
145       oprnd_info->first_pattern = false;
146       oprnds_info.quick_push (oprnd_info);
147     }
148
149   return oprnds_info;
150 }
151
152
153 /* Free operands info.  */
154
155 static void
156 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
157 {
158   int i;
159   slp_oprnd_info oprnd_info;
160
161   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
162     {
163       oprnd_info->def_stmts.release ();
164       XDELETE (oprnd_info);
165     }
166
167   oprnds_info.release ();
168 }
169
170
171 /* Find the place of the data-ref in STMT in the interleaving chain that starts
172    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
173
174 static int
175 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
176 {
177   gimple next_stmt = first_stmt;
178   int result = 0;
179
180   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181     return -1;
182
183   do
184     {
185       if (next_stmt == stmt)
186         return result;
187       result++;
188       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
189     }
190   while (next_stmt);
191
192   return -1;
193 }
194
195
196 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
197    they are of a valid type and that they match the defs of the first stmt of
198    the SLP group (stored in OPRNDS_INFO).  */
199
200 static bool
201 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
202                              slp_tree slp_node, gimple stmt,
203                              int ncopies_for_cost, bool first,
204                              vec<slp_oprnd_info> *oprnds_info,
205                              stmt_vector_for_cost *prologue_cost_vec,
206                              stmt_vector_for_cost *body_cost_vec)
207 {
208   tree oprnd;
209   unsigned int i, number_of_oprnds;
210   tree def, def_op0 = NULL_TREE;
211   gimple def_stmt;
212   enum vect_def_type dt = vect_uninitialized_def;
213   enum vect_def_type dt_op0 = vect_uninitialized_def;
214   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
215   tree lhs = gimple_get_lhs (stmt);
216   struct loop *loop = NULL;
217   enum tree_code rhs_code;
218   bool different_types = false;
219   bool pattern = false;
220   slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
221   int op_idx = 1;
222   tree compare_rhs = NULL_TREE;
223
224   if (loop_vinfo)
225     loop = LOOP_VINFO_LOOP (loop_vinfo);
226
227   if (is_gimple_call (stmt))
228     {
229       number_of_oprnds = gimple_call_num_args (stmt);
230       op_idx = 3;
231     }
232   else if (is_gimple_assign (stmt))
233     {
234       number_of_oprnds = gimple_num_ops (stmt) - 1;
235       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
236         number_of_oprnds++;
237     }
238   else
239     return false;
240
241   for (i = 0; i < number_of_oprnds; i++)
242     {
243       if (compare_rhs)
244         {
245           oprnd = compare_rhs;
246           compare_rhs = NULL_TREE;
247         }
248       else
249         oprnd = gimple_op (stmt, op_idx++);
250
251       oprnd_info = (*oprnds_info)[i];
252
253       if (COMPARISON_CLASS_P (oprnd))
254         {
255           compare_rhs = TREE_OPERAND (oprnd, 1);
256           oprnd = TREE_OPERAND (oprnd, 0);
257         }
258
259       if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
260                                &def, &dt)
261           || (!def_stmt && dt != vect_constant_def))
262         {
263           if (dump_enabled_p ())
264             {
265               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
266                                "Build SLP failed: can't find def for ");
267               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
268             }
269
270           return false;
271         }
272
273       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
274          from the pattern.  Check that all the stmts of the node are in the
275          pattern.  */
276       if (def_stmt && gimple_bb (def_stmt)
277           && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
278               || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
279                   && gimple_code (def_stmt) != GIMPLE_PHI))
280           && vinfo_for_stmt (def_stmt)
281           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
282           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
283           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
284         {
285           pattern = true;
286           if (!first && !oprnd_info->first_pattern)
287             {
288               if (dump_enabled_p ())
289                 {
290                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
291                                    "Build SLP failed: some of the stmts"
292                                    " are in a pattern, and others are not ");
293                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
294                 }
295
296               return false;
297             }
298
299           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
300           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
301
302           if (dt == vect_unknown_def_type)
303             {
304               if (dump_enabled_p ())
305                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
306                                  "Unsupported pattern.");
307               return false;
308             }
309
310           switch (gimple_code (def_stmt))
311             {
312               case GIMPLE_PHI:
313                 def = gimple_phi_result (def_stmt);
314                 break;
315
316               case GIMPLE_ASSIGN:
317                 def = gimple_assign_lhs (def_stmt);
318                 break;
319
320               default:
321                 if (dump_enabled_p ())
322                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
323                                    "unsupported defining stmt: ");
324                 return false;
325             }
326         }
327
328       if (first)
329         {
330           oprnd_info->first_dt = dt;
331           oprnd_info->first_pattern = pattern;
332           if (def)
333             {
334               oprnd_info->first_def_type = TREE_TYPE (def);
335               oprnd_info->first_const_oprnd = NULL_TREE;
336             }
337           else
338             {
339               oprnd_info->first_def_type = NULL_TREE;
340               oprnd_info->first_const_oprnd = oprnd;
341             }
342
343           if (i == 0)
344             {
345               def_op0 = def;
346               dt_op0 = dt;
347               /* Analyze costs (for the first stmt of the group only).  */
348               if (REFERENCE_CLASS_P (lhs))
349                 /* Store.  */
350                 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
351                                        dt, slp_node, prologue_cost_vec,
352                                        body_cost_vec);
353               else
354                 {
355                   enum vect_def_type dts[2];
356                   dts[0] = dt;
357                   dts[1] = vect_uninitialized_def;
358                   /* Not memory operation (we don't call this function for
359                      loads).  */
360                   vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
361                                           prologue_cost_vec, body_cost_vec);
362                 }
363             }
364         }
365       else
366         {
367           /* Not first stmt of the group, check that the def-stmt/s match
368              the def-stmt/s of the first stmt.  Allow different definition
369              types for reduction chains: the first stmt must be a
370              vect_reduction_def (a phi node), and the rest
371              vect_internal_def.  */
372           if (((oprnd_info->first_dt != dt
373                 && !(oprnd_info->first_dt == vect_reduction_def
374                      && dt == vect_internal_def))
375                || (oprnd_info->first_def_type != NULL_TREE
376                    && def
377                    && !types_compatible_p (oprnd_info->first_def_type,
378                                            TREE_TYPE (def))))
379                || (!def
380                    && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
381                                            TREE_TYPE (oprnd)))
382                || different_types)
383             {
384               if (number_of_oprnds != 2)
385                 {
386                   if (dump_enabled_p ())
387                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
388                                      "Build SLP failed: different types ");
389
390                   return false;
391                 }
392
393               /* Try to swap operands in case of binary operation.  */
394               if (i == 0)
395                 different_types = true;
396               else
397                 {
398                   oprnd0_info = (*oprnds_info)[0];
399                   if (is_gimple_assign (stmt)
400                       && (rhs_code = gimple_assign_rhs_code (stmt))
401                       && TREE_CODE_CLASS (rhs_code) == tcc_binary
402                       && commutative_tree_code (rhs_code)
403                       && oprnd0_info->first_dt == dt
404                       && oprnd_info->first_dt == dt_op0
405                       && def_op0 && def
406                       && !(oprnd0_info->first_def_type
407                            && !types_compatible_p (oprnd0_info->first_def_type,
408                                                    TREE_TYPE (def)))
409                       && !(oprnd_info->first_def_type
410                            && !types_compatible_p (oprnd_info->first_def_type,
411                                                    TREE_TYPE (def_op0))))
412                     {
413                       if (dump_enabled_p ())
414                         {
415                           dump_printf_loc (MSG_NOTE, vect_location,
416                                            "Swapping operands of ");
417                           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
418                         }
419
420                       swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
421                                           gimple_assign_rhs2_ptr (stmt));
422                     }
423                   else
424                     {
425                       if (dump_enabled_p ())
426                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
427                                          "Build SLP failed: different types ");
428
429                       return false;
430                     }
431                 }
432             }
433         }
434
435       /* Check the types of the definitions.  */
436       switch (dt)
437         {
438         case vect_constant_def:
439         case vect_external_def:
440         case vect_reduction_def:
441           break;
442
443         case vect_internal_def:
444           if (different_types)
445             {
446               oprnd0_info = (*oprnds_info)[0];
447               oprnd1_info = (*oprnds_info)[0];
448               if (i == 0)
449                 oprnd1_info->def_stmts.quick_push (def_stmt);
450               else
451                 oprnd0_info->def_stmts.quick_push (def_stmt);
452             }
453           else
454             oprnd_info->def_stmts.quick_push (def_stmt);
455
456           break;
457
458         default:
459           /* FORNOW: Not supported.  */
460           if (dump_enabled_p ())
461             {
462               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
463                                "Build SLP failed: illegal type of def ");
464               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
465             }
466
467           return false;
468         }
469     }
470
471   return true;
472 }
473
474
475 /* Recursively build an SLP tree starting from NODE.
476    Fail (and return FALSE) if def-stmts are not isomorphic, require data
477    permutation or are of unsupported types of operation.  Otherwise, return
478    TRUE.  */
479
480 static bool
481 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
482                      slp_tree *node, unsigned int group_size, int *outside_cost,
483                      int ncopies_for_cost, unsigned int *max_nunits,
484                      vec<int> *load_permutation,
485                      vec<slp_tree> *loads,
486                      unsigned int vectorization_factor, bool *loads_permuted,
487                      stmt_vector_for_cost *prologue_cost_vec,
488                      stmt_vector_for_cost *body_cost_vec)
489 {
490   unsigned int i;
491   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
492   gimple stmt = stmts[0];
493   enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
494   enum tree_code first_cond_code = ERROR_MARK;
495   tree lhs;
496   bool stop_recursion = false, need_same_oprnds = false;
497   tree vectype, scalar_type, first_op1 = NULL_TREE;
498   optab optab;
499   int icode;
500   enum machine_mode optab_op2_mode;
501   enum machine_mode vec_mode;
502   struct data_reference *first_dr;
503   HOST_WIDE_INT dummy;
504   bool permutation = false;
505   unsigned int load_place;
506   gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
507   vec<slp_oprnd_info> oprnds_info;
508   unsigned int nops;
509   slp_oprnd_info oprnd_info;
510   tree cond;
511
512   if (is_gimple_call (stmt))
513     nops = gimple_call_num_args (stmt);
514   else if (is_gimple_assign (stmt))
515     {
516       nops = gimple_num_ops (stmt) - 1;
517       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
518         nops++;
519     }
520   else
521     return false;
522
523   oprnds_info = vect_create_oprnd_info (nops, group_size);
524
525   /* For every stmt in NODE find its def stmt/s.  */
526   FOR_EACH_VEC_ELT (stmts, i, stmt)
527     {
528       if (dump_enabled_p ())
529         {
530           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
531           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
532         }
533
534       /* Fail to vectorize statements marked as unvectorizable.  */
535       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
536         {
537           if (dump_enabled_p ())
538             {
539               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
540                                "Build SLP failed: unvectorizable statement ");
541               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
542             }
543
544           vect_free_oprnd_info (oprnds_info);
545           return false;
546         }
547
548       lhs = gimple_get_lhs (stmt);
549       if (lhs == NULL_TREE)
550         {
551           if (dump_enabled_p ())
552             {
553               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
554                                "Build SLP failed: not GIMPLE_ASSIGN nor "
555                                "GIMPLE_CALL ");
556               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
557             }
558
559           vect_free_oprnd_info (oprnds_info);
560           return false;
561         }
562
563        if (is_gimple_assign (stmt)
564            && gimple_assign_rhs_code (stmt) == COND_EXPR
565            && (cond = gimple_assign_rhs1 (stmt))
566            && !COMPARISON_CLASS_P (cond))
567         {
568           if (dump_enabled_p ())
569             {
570               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
571                                "Build SLP failed: condition is not "
572                                "comparison ");
573               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
574             }
575
576           vect_free_oprnd_info (oprnds_info);
577           return false;
578         }
579
580       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
581       vectype = get_vectype_for_scalar_type (scalar_type);
582       if (!vectype)
583         {
584           if (dump_enabled_p ())
585             {
586               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
587                                "Build SLP failed: unsupported data-type ");
588               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
589                                  scalar_type);
590             }
591
592           vect_free_oprnd_info (oprnds_info);
593           return false;
594         }
595
596       /* In case of multiple types we need to detect the smallest type.  */
597       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
598         {
599           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
600           if (bb_vinfo)
601             vectorization_factor = *max_nunits;
602         }
603
604       if (is_gimple_call (stmt))
605         {
606           rhs_code = CALL_EXPR;
607           if (gimple_call_internal_p (stmt)
608               || gimple_call_tail_p (stmt)
609               || gimple_call_noreturn_p (stmt)
610               || !gimple_call_nothrow_p (stmt)
611               || gimple_call_chain (stmt))
612             {
613               if (dump_enabled_p ())
614                 {
615                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
616                                    "Build SLP failed: unsupported call type ");
617                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
618                 }
619
620               vect_free_oprnd_info (oprnds_info);
621               return false;
622             }
623         }
624       else
625         rhs_code = gimple_assign_rhs_code (stmt);
626
627       /* Check the operation.  */
628       if (i == 0)
629         {
630           first_stmt_code = rhs_code;
631
632           /* Shift arguments should be equal in all the packed stmts for a
633              vector shift with scalar shift operand.  */
634           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
635               || rhs_code == LROTATE_EXPR
636               || rhs_code == RROTATE_EXPR)
637             {
638               vec_mode = TYPE_MODE (vectype);
639
640               /* First see if we have a vector/vector shift.  */
641               optab = optab_for_tree_code (rhs_code, vectype,
642                                            optab_vector);
643
644               if (!optab
645                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
646                 {
647                   /* No vector/vector shift, try for a vector/scalar shift.  */
648                   optab = optab_for_tree_code (rhs_code, vectype,
649                                                optab_scalar);
650
651                   if (!optab)
652                     {
653                       if (dump_enabled_p ())
654                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
655                                          "Build SLP failed: no optab.");
656                       vect_free_oprnd_info (oprnds_info);
657                       return false;
658                     }
659                   icode = (int) optab_handler (optab, vec_mode);
660                   if (icode == CODE_FOR_nothing)
661                     {
662                       if (dump_enabled_p ())
663                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
664                                          "Build SLP failed: "
665                                          "op not supported by target.");
666                       vect_free_oprnd_info (oprnds_info);
667                       return false;
668                     }
669                   optab_op2_mode = insn_data[icode].operand[2].mode;
670                   if (!VECTOR_MODE_P (optab_op2_mode))
671                     {
672                       need_same_oprnds = true;
673                       first_op1 = gimple_assign_rhs2 (stmt);
674                     }
675                 }
676             }
677           else if (rhs_code == WIDEN_LSHIFT_EXPR)
678             {
679               need_same_oprnds = true;
680               first_op1 = gimple_assign_rhs2 (stmt);
681             }
682         }
683       else
684         {
685           if (first_stmt_code != rhs_code
686               && (first_stmt_code != IMAGPART_EXPR
687                   || rhs_code != REALPART_EXPR)
688               && (first_stmt_code != REALPART_EXPR
689                   || rhs_code != IMAGPART_EXPR)
690               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
691                    && (first_stmt_code == ARRAY_REF
692                        || first_stmt_code == BIT_FIELD_REF
693                        || first_stmt_code == INDIRECT_REF
694                        || first_stmt_code == COMPONENT_REF
695                        || first_stmt_code == MEM_REF)))
696             {
697               if (dump_enabled_p ())
698                 {
699                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
700                                    "Build SLP failed: different operation "
701                                    "in stmt ");
702                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
703                 }
704
705               vect_free_oprnd_info (oprnds_info);
706               return false;
707             }
708
709           if (need_same_oprnds
710               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
711             {
712               if (dump_enabled_p ())
713                 {
714                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
715                                    "Build SLP failed: different shift "
716                                    "arguments in ");
717                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
718                 }
719
720               vect_free_oprnd_info (oprnds_info);
721               return false;
722             }
723
724           if (rhs_code == CALL_EXPR)
725             {
726               gimple first_stmt = stmts[0];
727               if (gimple_call_num_args (stmt) != nops
728                   || !operand_equal_p (gimple_call_fn (first_stmt),
729                                        gimple_call_fn (stmt), 0)
730                   || gimple_call_fntype (first_stmt)
731                      != gimple_call_fntype (stmt))
732                 {
733                   if (dump_enabled_p ())
734                     {
735                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
736                                        "Build SLP failed: different calls in ");
737                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
738                                         stmt, 0);
739                     }
740
741                   vect_free_oprnd_info (oprnds_info);
742                   return false;
743                 }
744             }
745         }
746
747       /* Grouped store or load.  */
748       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
749         {
750           if (REFERENCE_CLASS_P (lhs))
751             {
752               /* Store.  */
753               if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
754                                                 stmt, ncopies_for_cost,
755                                                 (i == 0), &oprnds_info,
756                                                 prologue_cost_vec,
757                                                 body_cost_vec))
758                 {
759                   vect_free_oprnd_info (oprnds_info);
760                   return false;
761                 }
762             }
763           else
764             {
765               /* Load.  */
766               unsigned unrolling_factor
767                 = least_common_multiple
768                     (*max_nunits, group_size) / group_size;
769               /* FORNOW: Check that there is no gap between the loads
770                  and no gap between the groups when we need to load
771                  multiple groups at once.
772                  ???  We should enhance this to only disallow gaps
773                  inside vectors.  */
774               if ((unrolling_factor > 1
775                    && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
776                    && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
777                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
778                       && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
779                 {
780                   if (dump_enabled_p ())
781                     {
782                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
783                                        "Build SLP failed: grouped "
784                                        "loads have gaps ");
785                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
786                                         stmt, 0);
787                     }
788
789                   vect_free_oprnd_info (oprnds_info);
790                   return false;
791                 }
792
793               /* Check that the size of interleaved loads group is not
794                  greater than the SLP group size.  */
795               unsigned ncopies
796                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
797               if (loop_vinfo
798                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
799                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
800                        - GROUP_GAP (vinfo_for_stmt (stmt)))
801                       > ncopies * group_size))
802                 {
803                   if (dump_enabled_p ())
804                     {
805                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
806                                        "Build SLP failed: the number "
807                                        "of interleaved loads is greater than "
808                                        "the SLP group size ");
809                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
810                                         stmt, 0);
811                     }
812
813                   vect_free_oprnd_info (oprnds_info);
814                   return false;
815                 }
816
817               old_first_load = first_load;
818               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
819               if (prev_first_load)
820                 {
821                   /* Check that there are no loads from different interleaving
822                      chains in the same node.  The only exception is complex
823                      numbers.  */
824                   if (prev_first_load != first_load
825                       && rhs_code != REALPART_EXPR
826                       && rhs_code != IMAGPART_EXPR)
827                     {
828                       if (dump_enabled_p ())
829                         {
830                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
831                                            vect_location, 
832                                            "Build SLP failed: different "
833                                            "interleaving chains in one node ");
834                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
835                                             stmt, 0);
836                         }
837
838                       vect_free_oprnd_info (oprnds_info);
839                       return false;
840                     }
841                 }
842               else
843                 prev_first_load = first_load;
844
845               /* In some cases a group of loads is just the same load
846                  repeated N times.  Only analyze its cost once.  */
847               if (first_load == stmt && old_first_load != first_load)
848                 {
849                   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
850                   if (vect_supportable_dr_alignment (first_dr, false)
851                       == dr_unaligned_unsupported)
852                     {
853                       if (dump_enabled_p ())
854                         {
855                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
856                                            vect_location, 
857                                            "Build SLP failed: unsupported "
858                                            "unaligned load ");
859                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
860                                             stmt, 0);
861                         }
862
863                       vect_free_oprnd_info (oprnds_info);
864                       return false;
865                     }
866
867                   /* Analyze costs (for the first stmt in the group).  */
868                   vect_model_load_cost (vinfo_for_stmt (stmt),
869                                         ncopies_for_cost, false, *node,
870                                         prologue_cost_vec, body_cost_vec);
871                 }
872
873               /* Store the place of this load in the interleaving chain.  In
874                  case that permutation is needed we later decide if a specific
875                  permutation is supported.  */
876               load_place = vect_get_place_in_interleaving_chain (stmt,
877                                                                  first_load);
878               if (load_place != i)
879                 permutation = true;
880
881               load_permutation->safe_push (load_place);
882
883               /* We stop the tree when we reach a group of loads.  */
884               stop_recursion = true;
885              continue;
886            }
887         } /* Grouped access.  */
888       else
889         {
890           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
891             {
892               /* Not grouped load.  */
893               if (dump_enabled_p ())
894                 {
895                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
896                                    "Build SLP failed: not grouped load ");
897                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
898                 }
899
900               /* FORNOW: Not grouped loads are not supported.  */
901               vect_free_oprnd_info (oprnds_info);
902               return false;
903             }
904
905           /* Not memory operation.  */
906           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
907               && TREE_CODE_CLASS (rhs_code) != tcc_unary
908               && rhs_code != COND_EXPR
909               && rhs_code != CALL_EXPR)
910             {
911               if (dump_enabled_p ())
912                 {
913                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
914                                    "Build SLP failed: operation");
915                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
916                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
917                 }
918
919               vect_free_oprnd_info (oprnds_info);
920               return false;
921             }
922
923           if (rhs_code == COND_EXPR)
924             {
925               tree cond_expr = gimple_assign_rhs1 (stmt);
926
927               if (i == 0)
928                 first_cond_code = TREE_CODE (cond_expr);
929               else if (first_cond_code != TREE_CODE (cond_expr))
930                 {
931                   if (dump_enabled_p ())
932                     {
933                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
934                                        "Build SLP failed: different"
935                                        " operation");
936                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
937                                         stmt, 0);
938                     }
939
940                   vect_free_oprnd_info (oprnds_info);
941                   return false;
942                 }
943             }
944
945           /* Find the def-stmts.  */
946           if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
947                                             ncopies_for_cost, (i == 0),
948                                             &oprnds_info, prologue_cost_vec,
949                                             body_cost_vec))
950             {
951               vect_free_oprnd_info (oprnds_info);
952               return false;
953             }
954         }
955     }
956
957   /* Grouped loads were reached - stop the recursion.  */
958   if (stop_recursion)
959     {
960       loads->safe_push (*node);
961       if (permutation)
962         {
963           gimple first_stmt = stmts[0];
964           *loads_permuted = true;
965           (void) record_stmt_cost (body_cost_vec, group_size, vec_perm, 
966                                    vinfo_for_stmt (first_stmt), 0, vect_body);
967         }
968       else
969         {
970           /* We don't check here complex numbers chains, so we set
971              LOADS_PERMUTED for further check in
972              vect_supported_load_permutation_p.  */
973           if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
974             *loads_permuted = true;
975         }
976
977       vect_free_oprnd_info (oprnds_info);
978       return true;
979     }
980
981   /* Create SLP_TREE nodes for the definition node/s.  */
982   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
983     {
984       slp_tree child;
985
986       if (oprnd_info->first_dt != vect_internal_def)
987         continue;
988
989       child = vect_create_new_slp_node (oprnd_info->def_stmts);
990       if (!child
991           || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
992                                    outside_cost, ncopies_for_cost,
993                                    max_nunits, load_permutation, loads,
994                                    vectorization_factor, loads_permuted,
995                                    prologue_cost_vec, body_cost_vec))
996         {
997           if (child)
998             oprnd_info->def_stmts = vNULL;
999           vect_free_slp_tree (child);
1000           vect_free_oprnd_info (oprnds_info);
1001           return false;
1002         }
1003
1004       oprnd_info->def_stmts.create (0);
1005       SLP_TREE_CHILDREN (*node).quick_push (child);
1006     }
1007
1008   vect_free_oprnd_info (oprnds_info);
1009   return true;
1010 }
1011
1012 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1013
1014 static void
1015 vect_print_slp_tree (int dump_kind, slp_tree node)
1016 {
1017   int i;
1018   gimple stmt;
1019   slp_tree child;
1020
1021   if (!node)
1022     return;
1023
1024   dump_printf (dump_kind, "node ");
1025   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1026     {
1027       dump_printf (dump_kind, "\n\tstmt %d ", i);
1028       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1029     }
1030   dump_printf (dump_kind, "\n");
1031
1032   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1033     vect_print_slp_tree (dump_kind, child);
1034 }
1035
1036
1037 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1038    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1039    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1040    stmts in NODE are to be marked.  */
1041
1042 static void
1043 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1044 {
1045   int i;
1046   gimple stmt;
1047   slp_tree child;
1048
1049   if (!node)
1050     return;
1051
1052   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1053     if (j < 0 || i == j)
1054       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1055
1056   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1057     vect_mark_slp_stmts (child, mark, j);
1058 }
1059
1060
1061 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1062
1063 static void
1064 vect_mark_slp_stmts_relevant (slp_tree node)
1065 {
1066   int i;
1067   gimple stmt;
1068   stmt_vec_info stmt_info;
1069   slp_tree child;
1070
1071   if (!node)
1072     return;
1073
1074   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1075     {
1076       stmt_info = vinfo_for_stmt (stmt);
1077       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1078                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1079       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1080     }
1081
1082   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1083     vect_mark_slp_stmts_relevant (child);
1084 }
1085
1086
1087 /* Check if the permutation required by the SLP INSTANCE is supported.
1088    Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed.  */
1089
1090 static bool
1091 vect_supported_slp_permutation_p (slp_instance instance)
1092 {
1093   slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
1094   gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1095   gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
1096   vec<slp_tree> sorted_loads = vNULL;
1097   int index;
1098   slp_tree *tmp_loads = NULL;
1099   int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
1100   slp_tree load;
1101
1102   /* FORNOW: The only supported loads permutation is loads from the same
1103      location in all the loads in the node, when the data-refs in
1104      nodes of LOADS constitute an interleaving chain.
1105      Sort the nodes according to the order of accesses in the chain.  */
1106   tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
1107   for (i = 0, j = 0;
1108        SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
1109        && SLP_INSTANCE_LOADS (instance).iterate (j, &load);
1110        i += group_size, j++)
1111     {
1112       gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
1113       /* Check that the loads are all in the same interleaving chain.  */
1114       if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
1115         {
1116           if (dump_enabled_p ())
1117             {
1118               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1119                                "Build SLP failed: unsupported data "
1120                                "permutation ");
1121               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1122                                 scalar_stmt, 0);
1123             }
1124
1125           free (tmp_loads);
1126           return false;
1127         }
1128
1129       tmp_loads[index] = load;
1130     }
1131
1132   sorted_loads.create (group_size);
1133   for (i = 0; i < group_size; i++)
1134      sorted_loads.safe_push (tmp_loads[i]);
1135
1136   SLP_INSTANCE_LOADS (instance).release ();
1137   SLP_INSTANCE_LOADS (instance) = sorted_loads;
1138   free (tmp_loads);
1139
1140   if (!vect_transform_slp_perm_load (stmt, vNULL, NULL,
1141                                      SLP_INSTANCE_UNROLLING_FACTOR (instance),
1142                                      instance, true))
1143     return false;
1144
1145   return true;
1146 }
1147
1148
1149 /* Rearrange the statements of NODE according to PERMUTATION.  */
1150
1151 static void
1152 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1153                           vec<int> permutation)
1154 {
1155   gimple stmt;
1156   vec<gimple> tmp_stmts;
1157   unsigned int i;
1158   slp_tree child;
1159
1160   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1161     vect_slp_rearrange_stmts (child, group_size, permutation);
1162
1163   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1164   tmp_stmts.create (group_size);
1165   tmp_stmts.quick_grow_cleared (group_size);
1166
1167   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1168     tmp_stmts[permutation[i]] = stmt;
1169
1170   SLP_TREE_SCALAR_STMTS (node).release ();
1171   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1172 }
1173
1174
1175 /* Check if the required load permutation is supported.
1176    LOAD_PERMUTATION contains a list of indices of the loads.
1177    In SLP this permutation is relative to the order of grouped stores that are
1178    the base of the SLP instance.  */
1179
1180 static bool
1181 vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1182                                    vec<int> load_permutation)
1183 {
1184   int i = 0, j, prev = -1, next, k, number_of_groups;
1185   bool supported, bad_permutation = false;
1186   sbitmap load_index;
1187   slp_tree node, other_complex_node;
1188   gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
1189   unsigned complex_numbers = 0;
1190   struct data_reference *dr;
1191   bb_vec_info bb_vinfo;
1192
1193   /* FORNOW: permutations are only supported in SLP.  */
1194   if (!slp_instn)
1195     return false;
1196
1197   if (dump_enabled_p ())
1198     {
1199       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1200       FOR_EACH_VEC_ELT (load_permutation, i, next)
1201         dump_printf (MSG_NOTE, "%d ", next);
1202     }
1203
1204   /* In case of reduction every load permutation is allowed, since the order
1205      of the reduction statements is not important (as opposed to the case of
1206      grouped stores).  The only condition we need to check is that all the
1207      load nodes are of the same size and have the same permutation (and then
1208      rearrange all the nodes of the SLP instance according to this 
1209      permutation).  */
1210
1211   /* Check that all the load nodes are of the same size.  */
1212   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1213     {
1214       if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1215         return false;
1216
1217       stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1218       if (is_gimple_assign (stmt) 
1219           && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1220               || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1221         complex_numbers++;
1222     }
1223
1224   /* Complex operands can be swapped as following:
1225       real_c = real_b + real_a;
1226       imag_c = imag_a + imag_b;
1227      i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of 
1228      {real_a, imag_a} and {real_b, imag_b}.  We check here that if interleaving
1229      chains are mixed, they match the above pattern.  */
1230   if (complex_numbers)
1231     {
1232       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1233         {
1234           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, stmt)
1235             {
1236               if (j == 0)
1237                 first = stmt;
1238               else
1239                 {
1240                   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
1241                     {
1242                       if (complex_numbers != 2)
1243                         return false;
1244
1245                       if (i == 0)
1246                         k = 1;
1247                       else
1248                         k = 0;
1249  
1250                       other_complex_node = SLP_INSTANCE_LOADS (slp_instn)[k];
1251                       other_node_first = 
1252                                 SLP_TREE_SCALAR_STMTS (other_complex_node)[0];
1253
1254                       if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1255                           != other_node_first)
1256                        return false;
1257                     }
1258                 }
1259             }
1260         }
1261     }
1262
1263   /* We checked that this case ok, so there is no need to proceed with 
1264      permutation tests.  */
1265   if (complex_numbers == 2
1266       && SLP_INSTANCE_LOADS (slp_instn).length () == 2)
1267     {
1268       SLP_INSTANCE_LOADS (slp_instn).release ();
1269       SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1270       return true;
1271     }
1272                    
1273   node = SLP_INSTANCE_TREE (slp_instn);
1274   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1275   /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1276      instance, not all the loads belong to the same node or interleaving
1277      group.  Hence, we need to divide them into groups according to
1278      GROUP_SIZE.  */
1279   number_of_groups = load_permutation.length () / group_size;
1280
1281   /* Reduction (there are no data-refs in the root).
1282      In reduction chain the order of the loads is important.  */
1283   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1284       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1285     {
1286       int first_group_load_index;
1287
1288       /* Compare all the permutation sequences to the first one.  */
1289       for (i = 1; i < number_of_groups; i++)
1290         {
1291           k = 0;
1292           for (j = i * group_size; j < i * group_size + group_size; j++)
1293             {
1294               next = load_permutation[j];
1295               first_group_load_index = load_permutation[k];
1296
1297               if (next != first_group_load_index)
1298                 {
1299                   bad_permutation = true;
1300                   break;
1301                 }
1302
1303               k++;
1304             }
1305
1306           if (bad_permutation)
1307             break;
1308         }
1309
1310       if (!bad_permutation)
1311         {
1312           /* Check that the loads in the first sequence are different and there
1313              are no gaps between them.  */
1314           load_index = sbitmap_alloc (group_size);
1315           bitmap_clear (load_index);
1316           for (k = 0; k < group_size; k++)
1317             {
1318               first_group_load_index = load_permutation[k];
1319               if (bitmap_bit_p (load_index, first_group_load_index))
1320                 {
1321                   bad_permutation = true;
1322                   break;
1323                 }
1324
1325               bitmap_set_bit (load_index, first_group_load_index);
1326             }
1327
1328           if (!bad_permutation)
1329             for (k = 0; k < group_size; k++)
1330               if (!bitmap_bit_p (load_index, k))
1331                 {
1332                   bad_permutation = true;
1333                   break;
1334                 }
1335
1336           sbitmap_free (load_index);
1337         }
1338
1339       if (!bad_permutation)
1340         {
1341           /* This permutation is valid for reduction.  Since the order of the
1342              statements in the nodes is not important unless they are memory
1343              accesses, we can rearrange the statements in all the nodes 
1344              according to the order of the loads.  */
1345           vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1346                                     load_permutation);
1347           SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1348           return true;
1349         }
1350     }
1351
1352   /* In basic block vectorization we allow any subchain of an interleaving
1353      chain.
1354      FORNOW: not supported in loop SLP because of realignment compications.  */
1355   bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1356   bad_permutation = false;
1357   /* Check that for every node in the instance the loads form a subchain.  */
1358   if (bb_vinfo)
1359     {
1360       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1361         {
1362           next_load = NULL;
1363           first_load = NULL;
1364           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1365             {
1366               if (!first_load)
1367                 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1368               else if (first_load
1369                          != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1370                 {
1371                   bad_permutation = true;
1372                   break;
1373                 }
1374
1375               if (j != 0 && next_load != load)
1376                 {
1377                   bad_permutation = true;
1378                   break;
1379                 }
1380
1381               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1382             }
1383
1384           if (bad_permutation)
1385             break;
1386         }
1387
1388       /* Check that the alignment of the first load in every subchain, i.e.,
1389          the first statement in every load node, is supported.  */
1390       if (!bad_permutation)
1391         {
1392           FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1393             {
1394               first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1395               if (first_load
1396                     != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1397                 {
1398                   dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1399                   if (vect_supportable_dr_alignment (dr, false)
1400                        == dr_unaligned_unsupported)
1401                     {
1402                       if (dump_enabled_p ())
1403                         {
1404                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1405                                            vect_location, 
1406                                            "unsupported unaligned load ");
1407                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1408                                             first_load, 0);
1409                         }
1410                       bad_permutation = true;
1411                       break;
1412                     }
1413                 }
1414             }
1415
1416           if (!bad_permutation)
1417             {
1418               SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
1419               return true;
1420             }
1421         }
1422     }
1423
1424   /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1425      GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
1426      well (unless it's reduction).  */
1427   if (load_permutation.length ()
1428       != (unsigned int) (group_size * group_size))
1429     return false;
1430
1431   supported = true;
1432   load_index = sbitmap_alloc (group_size);
1433   bitmap_clear (load_index);
1434   for (j = 0; j < group_size; j++)
1435     {
1436       for (i = j * group_size, k = 0;
1437            load_permutation.iterate (i, &next) && k < group_size;
1438            i++, k++)
1439        {
1440          if (i != j * group_size && next != prev)
1441           {
1442             supported = false;
1443             break;
1444           }
1445
1446          prev = next;
1447        }
1448
1449       if (bitmap_bit_p (load_index, prev))
1450         {
1451           supported = false;
1452           break;
1453         }
1454
1455       bitmap_set_bit (load_index, prev);
1456     }
1457  
1458   for (j = 0; j < group_size; j++)
1459     if (!bitmap_bit_p (load_index, j))
1460       {
1461         sbitmap_free (load_index);
1462         return false;
1463       }
1464
1465   sbitmap_free (load_index);
1466
1467   if (supported && i == group_size * group_size
1468       && vect_supported_slp_permutation_p (slp_instn))
1469     return true;
1470
1471   return false;
1472 }
1473
1474
1475 /* Find the first load in the loop that belongs to INSTANCE.
1476    When loads are in several SLP nodes, there can be a case in which the first
1477    load does not appear in the first SLP node to be transformed, causing
1478    incorrect order of statements.  Since we generate all the loads together,
1479    they must be inserted before the first load of the SLP instance and not
1480    before the first load of the first node of the instance.  */
1481
1482 static gimple
1483 vect_find_first_load_in_slp_instance (slp_instance instance)
1484 {
1485   int i, j;
1486   slp_tree load_node;
1487   gimple first_load = NULL, load;
1488
1489   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1490     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1491       first_load = get_earlier_stmt (load, first_load);
1492
1493   return first_load;
1494 }
1495
1496
1497 /* Find the last store in SLP INSTANCE.  */
1498
1499 static gimple
1500 vect_find_last_store_in_slp_instance (slp_instance instance)
1501 {
1502   int i;
1503   slp_tree node;
1504   gimple last_store = NULL, store;
1505
1506   node = SLP_INSTANCE_TREE (instance);
1507   for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
1508     last_store = get_later_stmt (store, last_store);
1509
1510   return last_store;
1511 }
1512
1513
1514 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1515    vect_build_slp_tree to build a tree of packed stmts if possible.
1516    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1517
1518 static bool
1519 vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1520                            gimple stmt)
1521 {
1522   slp_instance new_instance;
1523   slp_tree node;
1524   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1525   unsigned int unrolling_factor = 1, nunits;
1526   tree vectype, scalar_type = NULL_TREE;
1527   gimple next;
1528   unsigned int vectorization_factor = 0;
1529   int outside_cost = 0, ncopies_for_cost, i;
1530   unsigned int max_nunits = 0;
1531   vec<int> load_permutation;
1532   vec<slp_tree> loads;
1533   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1534   bool loads_permuted = false;
1535   vec<gimple> scalar_stmts;
1536   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1537   stmt_info_for_cost *si;
1538
1539   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1540     {
1541       if (dr)
1542         {
1543           scalar_type = TREE_TYPE (DR_REF (dr));
1544           vectype = get_vectype_for_scalar_type (scalar_type);
1545         }
1546       else
1547         {
1548           gcc_assert (loop_vinfo);
1549           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1550         }
1551
1552       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1553     }
1554   else
1555     {
1556       gcc_assert (loop_vinfo);
1557       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1558       group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
1559     }
1560
1561   if (!vectype)
1562     {
1563       if (dump_enabled_p ())
1564         {
1565           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1566                            "Build SLP failed: unsupported data-type ");
1567           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1568         }
1569
1570       return false;
1571     }
1572
1573   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1574   if (loop_vinfo)
1575     vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1576   else
1577     vectorization_factor = nunits;
1578
1579   /* Calculate the unrolling factor.  */
1580   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1581   if (unrolling_factor != 1 && !loop_vinfo)
1582     {
1583       if (dump_enabled_p ())
1584         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1585                          "Build SLP failed: unrolling required in basic"
1586                          " block SLP");
1587
1588       return false;
1589     }
1590
1591   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1592   scalar_stmts.create (group_size);
1593   next = stmt;
1594   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1595     {
1596       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1597       while (next)
1598         {
1599           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1600               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1601             scalar_stmts.safe_push (
1602                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1603           else
1604             scalar_stmts.safe_push (next);
1605           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1606         }
1607     }
1608   else
1609     {
1610       /* Collect reduction statements.  */
1611       vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1612       for (i = 0; reductions.iterate (i, &next); i++)
1613         scalar_stmts.safe_push (next);
1614     }
1615
1616   node = vect_create_new_slp_node (scalar_stmts);
1617
1618   /* Calculate the number of vector stmts to create based on the unrolling
1619      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1620      GROUP_SIZE / NUNITS otherwise.  */
1621   ncopies_for_cost = unrolling_factor * group_size / nunits;
1622
1623   load_permutation.create (group_size * group_size);
1624   loads.create (group_size);
1625   prologue_cost_vec.create (10);
1626   body_cost_vec.create (10);
1627
1628   /* Build the tree for the SLP instance.  */
1629   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1630                            &outside_cost, ncopies_for_cost,
1631                            &max_nunits, &load_permutation, &loads,
1632                            vectorization_factor, &loads_permuted,
1633                            &prologue_cost_vec, &body_cost_vec))
1634     {
1635       void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1636                     : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1637
1638       /* Calculate the unrolling factor based on the smallest type.  */
1639       if (max_nunits > nunits)
1640         unrolling_factor = least_common_multiple (max_nunits, group_size)
1641                            / group_size;
1642
1643       if (unrolling_factor != 1 && !loop_vinfo)
1644         {
1645           if (dump_enabled_p ())
1646             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1647                              "Build SLP failed: unrolling required in basic"
1648                              " block SLP");
1649           vect_free_slp_tree (node);
1650           body_cost_vec.release ();
1651           prologue_cost_vec.release ();
1652           load_permutation.release ();
1653           loads.release ();
1654           return false;
1655         }
1656
1657       /* Create a new SLP instance.  */
1658       new_instance = XNEW (struct _slp_instance);
1659       SLP_INSTANCE_TREE (new_instance) = node;
1660       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1661       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1662       SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec;
1663       SLP_INSTANCE_LOADS (new_instance) = loads;
1664       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1665       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1666
1667       if (loads_permuted)
1668         {
1669           if (!vect_supported_load_permutation_p (new_instance, group_size,
1670                                                   load_permutation))
1671             {
1672               if (dump_enabled_p ())
1673                 {
1674                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
1675                                    "Build SLP failed: unsupported load "
1676                                    "permutation ");
1677                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1678                 }
1679
1680               vect_free_slp_instance (new_instance);
1681               prologue_cost_vec.release ();
1682               return false;
1683             }
1684
1685           SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1686              = vect_find_first_load_in_slp_instance (new_instance);
1687         }
1688       else
1689         SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release ();
1690
1691       /* Record the prologue costs, which were delayed until we were
1692          sure that SLP was successful.  Unlike the body costs, we know
1693          the final values now regardless of the loop vectorization factor.  */
1694       FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1695         {
1696           struct _stmt_vec_info *stmt_info
1697             = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1698           (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1699                                 si->misalign, vect_prologue);
1700         }
1701
1702       prologue_cost_vec.release ();
1703
1704       if (loop_vinfo)
1705         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
1706       else
1707         BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
1708
1709       if (dump_enabled_p ())
1710         vect_print_slp_tree (MSG_NOTE, node);
1711
1712       return true;
1713     }
1714   else
1715     {
1716       body_cost_vec.release ();
1717       prologue_cost_vec.release ();
1718     }
1719
1720   /* Failed to SLP.  */
1721   /* Free the allocated memory.  */
1722   vect_free_slp_tree (node);
1723   load_permutation.release ();
1724   loads.release ();
1725
1726   return false;
1727 }
1728
1729
1730 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1731    trees of packed scalar stmts if SLP is possible.  */
1732
1733 bool
1734 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1735 {
1736   unsigned int i;
1737   vec<gimple> grouped_stores;
1738   vec<gimple> reductions = vNULL;
1739   vec<gimple> reduc_chains = vNULL;
1740   gimple first_element;
1741   bool ok = false;
1742
1743   if (dump_enabled_p ())
1744     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
1745
1746   if (loop_vinfo)
1747     {
1748       grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
1749       reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
1750       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1751     }
1752   else
1753     grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
1754
1755   /* Find SLP sequences starting from groups of grouped stores.  */
1756   FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1757     if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1758       ok = true;
1759
1760   if (bb_vinfo && !ok)
1761     {
1762       if (dump_enabled_p ())
1763         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1764                          "Failed to SLP the basic block.");
1765
1766       return false;
1767     }
1768
1769   if (loop_vinfo
1770       && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
1771     {
1772       /* Find SLP sequences starting from reduction chains.  */
1773       FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1774         if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1775           ok = true;
1776         else
1777           return false;
1778
1779       /* Don't try to vectorize SLP reductions if reduction chain was
1780          detected.  */
1781       return ok;
1782     }
1783
1784   /* Find SLP sequences starting from groups of reductions.  */
1785   if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1786       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
1787     ok = true;
1788
1789   return true;
1790 }
1791
1792
1793 /* For each possible SLP instance decide whether to SLP it and calculate overall
1794    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1795    least one instance.  */
1796
1797 bool
1798 vect_make_slp_decision (loop_vec_info loop_vinfo)
1799 {
1800   unsigned int i, unrolling_factor = 1;
1801   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1802   slp_instance instance;
1803   int decided_to_slp = 0;
1804
1805   if (dump_enabled_p ())
1806     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
1807
1808   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1809     {
1810       /* FORNOW: SLP if you can.  */
1811       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1812         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1813
1814       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1815          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1816          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1817       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1818       decided_to_slp++;
1819     }
1820
1821   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1822
1823   if (decided_to_slp && dump_enabled_p ())
1824     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1825                      "Decided to SLP %d instances. Unrolling factor %d",
1826                      decided_to_slp, unrolling_factor);
1827
1828   return (decided_to_slp > 0);
1829 }
1830
1831
1832 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1833    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1834
1835 static void
1836 vect_detect_hybrid_slp_stmts (slp_tree node)
1837 {
1838   int i;
1839   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1840   gimple stmt = stmts[0];
1841   imm_use_iterator imm_iter;
1842   gimple use_stmt;
1843   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1844   slp_tree child;
1845   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1846   struct loop *loop = NULL;
1847   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1848   basic_block bb = NULL;
1849
1850   if (!node)
1851     return;
1852
1853   if (loop_vinfo)
1854     loop = LOOP_VINFO_LOOP (loop_vinfo);
1855   else
1856     bb = BB_VINFO_BB (bb_vinfo);
1857
1858   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1859     if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1860         && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1861       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1862         if (gimple_bb (use_stmt)
1863             && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1864                  || bb == gimple_bb (use_stmt))
1865             && (stmt_vinfo = vinfo_for_stmt (use_stmt))
1866             && !STMT_SLP_TYPE (stmt_vinfo)
1867             && (STMT_VINFO_RELEVANT (stmt_vinfo)
1868                 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1869             && !(gimple_code (use_stmt) == GIMPLE_PHI
1870                  && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1871                   == vect_reduction_def))
1872           vect_mark_slp_stmts (node, hybrid, i);
1873
1874   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1875     vect_detect_hybrid_slp_stmts (child);
1876 }
1877
1878
1879 /* Find stmts that must be both vectorized and SLPed.  */
1880
1881 void
1882 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1883 {
1884   unsigned int i;
1885   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1886   slp_instance instance;
1887
1888   if (dump_enabled_p ())
1889     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
1890
1891   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1892     vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1893 }
1894
1895
1896 /* Create and initialize a new bb_vec_info struct for BB, as well as
1897    stmt_vec_info structs for all the stmts in it.  */
1898
1899 static bb_vec_info
1900 new_bb_vec_info (basic_block bb)
1901 {
1902   bb_vec_info res = NULL;
1903   gimple_stmt_iterator gsi;
1904
1905   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1906   BB_VINFO_BB (res) = bb;
1907
1908   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1909     {
1910       gimple stmt = gsi_stmt (gsi);
1911       gimple_set_uid (stmt, 0);
1912       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1913     }
1914
1915   BB_VINFO_GROUPED_STORES (res).create (10);
1916   BB_VINFO_SLP_INSTANCES (res).create (2);
1917   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
1918
1919   bb->aux = res;
1920   return res;
1921 }
1922
1923
1924 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1925    stmts in the basic block.  */
1926
1927 static void
1928 destroy_bb_vec_info (bb_vec_info bb_vinfo)
1929 {
1930   vec<slp_instance> slp_instances;
1931   slp_instance instance;
1932   basic_block bb;
1933   gimple_stmt_iterator si;
1934   unsigned i;
1935
1936   if (!bb_vinfo)
1937     return;
1938
1939   bb = BB_VINFO_BB (bb_vinfo);
1940
1941   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1942     {
1943       gimple stmt = gsi_stmt (si);
1944       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1945
1946       if (stmt_info)
1947         /* Free stmt_vec_info.  */
1948         free_stmt_vec_info (stmt);
1949     }
1950
1951   free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1952   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
1953   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
1954   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1955   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1956     vect_free_slp_instance (instance);
1957   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
1958   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1959   free (bb_vinfo);
1960   bb->aux = NULL;
1961 }
1962
1963
1964 /* Analyze statements contained in SLP tree node after recursively analyzing
1965    the subtree. Return TRUE if the operations are supported.  */
1966
1967 static bool
1968 vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1969 {
1970   bool dummy;
1971   int i;
1972   gimple stmt;
1973   slp_tree child;
1974
1975   if (!node)
1976     return true;
1977
1978   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1979     if (!vect_slp_analyze_node_operations (bb_vinfo, child))
1980       return false;
1981
1982   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1983     {
1984       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1985       gcc_assert (stmt_info);
1986       gcc_assert (PURE_SLP_STMT (stmt_info));
1987
1988       if (!vect_analyze_stmt (stmt, &dummy, node))
1989         return false;
1990     }
1991
1992   return true;
1993 }
1994
1995
1996 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
1997    operations are supported. */
1998
1999 static bool
2000 vect_slp_analyze_operations (bb_vec_info bb_vinfo)
2001 {
2002   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2003   slp_instance instance;
2004   int i;
2005
2006   for (i = 0; slp_instances.iterate (i, &instance); )
2007     {
2008       if (!vect_slp_analyze_node_operations (bb_vinfo,
2009                                              SLP_INSTANCE_TREE (instance)))
2010         {
2011           vect_free_slp_instance (instance);
2012           slp_instances.ordered_remove (i);
2013         }
2014       else
2015         i++;
2016     }
2017
2018   if (!slp_instances.length ())
2019     return false;
2020
2021   return true;
2022 }
2023
2024 /* Check if vectorization of the basic block is profitable.  */
2025
2026 static bool
2027 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2028 {
2029   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2030   slp_instance instance;
2031   int i, j;
2032   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2033   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2034   unsigned int stmt_cost;
2035   gimple stmt;
2036   gimple_stmt_iterator si;
2037   basic_block bb = BB_VINFO_BB (bb_vinfo);
2038   void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
2039   stmt_vec_info stmt_info = NULL;
2040   stmt_vector_for_cost body_cost_vec;
2041   stmt_info_for_cost *ci;
2042
2043   /* Calculate vector costs.  */
2044   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2045     {
2046       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2047
2048       FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
2049         {
2050           stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2051           (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2052                                 stmt_info, ci->misalign, vect_body);
2053         }
2054     }
2055
2056   /* Calculate scalar cost.  */
2057   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2058     {
2059       stmt = gsi_stmt (si);
2060       stmt_info = vinfo_for_stmt (stmt);
2061
2062       if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2063           || !PURE_SLP_STMT (stmt_info))
2064         continue;
2065
2066       if (STMT_VINFO_DATA_REF (stmt_info))
2067         {
2068           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2069             stmt_cost = vect_get_stmt_cost (scalar_load);
2070           else
2071             stmt_cost = vect_get_stmt_cost (scalar_store);
2072         }
2073       else
2074         stmt_cost = vect_get_stmt_cost (scalar_stmt);
2075
2076       scalar_cost += stmt_cost;
2077     }
2078
2079   /* Complete the target-specific cost calculation.  */
2080   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2081                &vec_inside_cost, &vec_epilogue_cost);
2082
2083   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2084
2085   if (dump_enabled_p ())
2086     {
2087       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2088       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2089                    vec_inside_cost);
2090       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2091       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2092       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d", scalar_cost);
2093     }
2094
2095   /* Vectorization is profitable if its cost is less than the cost of scalar
2096      version.  */
2097   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2098     return false;
2099
2100   return true;
2101 }
2102
2103 /* Check if the basic block can be vectorized.  */
2104
2105 static bb_vec_info
2106 vect_slp_analyze_bb_1 (basic_block bb)
2107 {
2108   bb_vec_info bb_vinfo;
2109   vec<slp_instance> slp_instances;
2110   slp_instance instance;
2111   int i;
2112   int min_vf = 2;
2113
2114   bb_vinfo = new_bb_vec_info (bb);
2115   if (!bb_vinfo)
2116     return NULL;
2117
2118   if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
2119     {
2120       if (dump_enabled_p ())
2121         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2122                          "not vectorized: unhandled data-ref in basic "
2123                          "block.\n");
2124
2125       destroy_bb_vec_info (bb_vinfo);
2126       return NULL;
2127     }
2128
2129   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2130     {
2131       if (dump_enabled_p ())
2132         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133                          "not vectorized: not enough data-refs in "
2134                          "basic block.\n");
2135
2136       destroy_bb_vec_info (bb_vinfo);
2137       return NULL;
2138     }
2139
2140   if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2141     {
2142      if (dump_enabled_p ())
2143        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2144                         "not vectorized: unhandled data access in "
2145                         "basic block.\n");
2146
2147       destroy_bb_vec_info (bb_vinfo);
2148       return NULL;
2149     }
2150
2151   vect_pattern_recog (NULL, bb_vinfo);
2152
2153   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2154      {
2155        if (dump_enabled_p ())
2156          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2157                           "not vectorized: unhandled data dependence "
2158                           "in basic block.\n");
2159
2160        destroy_bb_vec_info (bb_vinfo);
2161        return NULL;
2162      }
2163
2164   if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2165     {
2166       if (dump_enabled_p ())
2167         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2168                          "not vectorized: bad data alignment in basic "
2169                          "block.\n");
2170
2171       destroy_bb_vec_info (bb_vinfo);
2172       return NULL;
2173     }
2174
2175   /* Check the SLP opportunities in the basic block, analyze and build SLP
2176      trees.  */
2177   if (!vect_analyze_slp (NULL, bb_vinfo))
2178     {
2179       if (dump_enabled_p ())
2180         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2181                          "not vectorized: failed to find SLP opportunities "
2182                          "in basic block.\n");
2183
2184       destroy_bb_vec_info (bb_vinfo);
2185       return NULL;
2186     }
2187
2188   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2189
2190   /* Mark all the statements that we want to vectorize as pure SLP and
2191      relevant.  */
2192   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2193     {
2194       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2195       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2196     }
2197
2198   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2199     {
2200       if (dump_enabled_p ())
2201         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2202                          "not vectorized: unsupported alignment in basic "
2203                          "block.\n");
2204       destroy_bb_vec_info (bb_vinfo);
2205       return NULL;
2206     }
2207
2208   if (!vect_slp_analyze_operations (bb_vinfo))
2209     {
2210       if (dump_enabled_p ())
2211         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2212                          "not vectorized: bad operation in basic block.\n");
2213
2214       destroy_bb_vec_info (bb_vinfo);
2215       return NULL;
2216     }
2217
2218   /* Cost model: check if the vectorization is worthwhile.  */
2219   if (flag_vect_cost_model
2220       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2221     {
2222       if (dump_enabled_p ())
2223         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2224                          "not vectorized: vectorization is not "
2225                          "profitable.\n");
2226
2227       destroy_bb_vec_info (bb_vinfo);
2228       return NULL;
2229     }
2230
2231   if (dump_enabled_p ())
2232     dump_printf_loc (MSG_NOTE, vect_location,
2233                      "Basic block will be vectorized using SLP\n");
2234
2235   return bb_vinfo;
2236 }
2237
2238
2239 bb_vec_info
2240 vect_slp_analyze_bb (basic_block bb)
2241 {
2242   bb_vec_info bb_vinfo;
2243   int insns = 0;
2244   gimple_stmt_iterator gsi;
2245   unsigned int vector_sizes;
2246
2247   if (dump_enabled_p ())
2248     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2249
2250   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2251     {
2252       gimple stmt = gsi_stmt (gsi);
2253       if (!is_gimple_debug (stmt)
2254           && !gimple_nop_p (stmt)
2255           && gimple_code (stmt) != GIMPLE_LABEL)
2256         insns++;
2257     }
2258
2259   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2260     {
2261       if (dump_enabled_p ())
2262         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2263                          "not vectorized: too many instructions in "
2264                          "basic block.\n");
2265
2266       return NULL;
2267     }
2268
2269   /* Autodetect first vector size we try.  */
2270   current_vector_size = 0;
2271   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2272
2273   while (1)
2274     {
2275       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2276       if (bb_vinfo)
2277         return bb_vinfo;
2278
2279       destroy_bb_vec_info (bb_vinfo);
2280
2281       vector_sizes &= ~current_vector_size;
2282       if (vector_sizes == 0
2283           || current_vector_size == 0)
2284         return NULL;
2285
2286       /* Try the next biggest vector size.  */
2287       current_vector_size = 1 << floor_log2 (vector_sizes);
2288       if (dump_enabled_p ())
2289         dump_printf_loc (MSG_NOTE, vect_location,
2290                          "***** Re-trying analysis with "
2291                          "vector size %d\n", current_vector_size);
2292     }
2293 }
2294
2295
2296 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
2297    the number of created vector stmts depends on the unrolling factor).
2298    However, the actual number of vector stmts for every SLP node depends on
2299    VF which is set later in vect_analyze_operations ().  Hence, SLP costs
2300    should be updated.  In this function we assume that the inside costs
2301    calculated in vect_model_xxx_cost are linear in ncopies.  */
2302
2303 void
2304 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2305 {
2306   unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2307   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2308   slp_instance instance;
2309   stmt_vector_for_cost body_cost_vec;
2310   stmt_info_for_cost *si;
2311   void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2312
2313   if (dump_enabled_p ())
2314     dump_printf_loc (MSG_NOTE, vect_location,
2315                      "=== vect_update_slp_costs_according_to_vf ===");
2316
2317   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2318     {
2319       /* We assume that costs are linear in ncopies.  */
2320       int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2321
2322       /* Record the instance's instructions in the target cost model.
2323          This was delayed until here because the count of instructions
2324          isn't known beforehand.  */
2325       body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
2326
2327       FOR_EACH_VEC_ELT (body_cost_vec, j, si)
2328         (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2329                               vinfo_for_stmt (si->stmt), si->misalign,
2330                               vect_body);
2331     }
2332 }
2333
2334
2335 /* For constant and loop invariant defs of SLP_NODE this function returns
2336    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2337    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2338    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2339    REDUC_INDEX is the index of the reduction operand in the statements, unless
2340    it is -1.  */
2341
2342 static void
2343 vect_get_constant_vectors (tree op, slp_tree slp_node,
2344                            vec<tree> *vec_oprnds,
2345                            unsigned int op_num, unsigned int number_of_vectors,
2346                            int reduc_index)
2347 {
2348   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2349   gimple stmt = stmts[0];
2350   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2351   unsigned nunits;
2352   tree vec_cst;
2353   tree *elts;
2354   unsigned j, number_of_places_left_in_vector;
2355   tree vector_type;
2356   tree vop;
2357   int group_size = stmts.length ();
2358   unsigned int vec_num, i;
2359   unsigned number_of_copies = 1;
2360   vec<tree> voprnds;
2361   voprnds.create (number_of_vectors);
2362   bool constant_p, is_store;
2363   tree neutral_op = NULL;
2364   enum tree_code code = gimple_expr_code (stmt);
2365   gimple def_stmt;
2366   struct loop *loop;
2367   gimple_seq ctor_seq = NULL;
2368
2369   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2370       && reduc_index != -1)
2371     {
2372       op_num = reduc_index - 1;
2373       op = gimple_op (stmt, reduc_index);
2374       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2375          we need either neutral operands or the original operands.  See
2376          get_initial_def_for_reduction() for details.  */
2377       switch (code)
2378         {
2379           case WIDEN_SUM_EXPR:
2380           case DOT_PROD_EXPR:
2381           case PLUS_EXPR:
2382           case MINUS_EXPR:
2383           case BIT_IOR_EXPR:
2384           case BIT_XOR_EXPR:
2385              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2386                neutral_op = build_real (TREE_TYPE (op), dconst0);
2387              else
2388                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2389
2390              break;
2391
2392           case MULT_EXPR:
2393              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2394                neutral_op = build_real (TREE_TYPE (op), dconst1);
2395              else
2396                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2397
2398              break;
2399
2400           case BIT_AND_EXPR:
2401             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2402             break;
2403
2404           case MAX_EXPR:
2405           case MIN_EXPR:
2406             def_stmt = SSA_NAME_DEF_STMT (op);
2407             loop = (gimple_bb (stmt))->loop_father;
2408             neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2409                                                 loop_preheader_edge (loop));
2410             break;
2411
2412           default:
2413             neutral_op = NULL;
2414         }
2415     }
2416
2417   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2418     {
2419       is_store = true;
2420       op = gimple_assign_rhs1 (stmt);
2421     }
2422   else
2423     is_store = false;
2424
2425   gcc_assert (op);
2426
2427   if (CONSTANT_CLASS_P (op))
2428     constant_p = true;
2429   else
2430     constant_p = false;
2431
2432   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2433   gcc_assert (vector_type);
2434   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2435
2436   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2437      created vectors. It is greater than 1 if unrolling is performed.
2438
2439      For example, we have two scalar operands, s1 and s2 (e.g., group of
2440      strided accesses of size two), while NUNITS is four (i.e., four scalars
2441      of this type can be packed in a vector).  The output vector will contain
2442      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2443      will be 2).
2444
2445      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2446      containing the operands.
2447
2448      For example, NUNITS is four as before, and the group size is 8
2449      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2450      {s5, s6, s7, s8}.  */
2451
2452   number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2453
2454   number_of_places_left_in_vector = nunits;
2455   elts = XALLOCAVEC (tree, nunits);
2456   for (j = 0; j < number_of_copies; j++)
2457     {
2458       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2459         {
2460           if (is_store)
2461             op = gimple_assign_rhs1 (stmt);
2462           else
2463             {
2464               switch (code)
2465                 {
2466                   case COND_EXPR:
2467                     if (op_num == 0 || op_num == 1)
2468                       {
2469                         tree cond = gimple_assign_rhs1 (stmt);
2470                         op = TREE_OPERAND (cond, op_num);
2471                       }
2472                     else
2473                       {
2474                         if (op_num == 2)
2475                           op = gimple_assign_rhs2 (stmt);
2476                         else
2477                           op = gimple_assign_rhs3 (stmt);
2478                       }
2479                     break;
2480
2481                   case CALL_EXPR:
2482                     op = gimple_call_arg (stmt, op_num);
2483                     break;
2484
2485                   case LSHIFT_EXPR:
2486                   case RSHIFT_EXPR:
2487                   case LROTATE_EXPR:
2488                   case RROTATE_EXPR:
2489                     op = gimple_op (stmt, op_num + 1);
2490                     /* Unlike the other binary operators, shifts/rotates have
2491                        the shift count being int, instead of the same type as
2492                        the lhs, so make sure the scalar is the right type if
2493                        we are dealing with vectors of
2494                        long long/long/short/char.  */
2495                     if (op_num == 1 && constant_p)
2496                       op = fold_convert (TREE_TYPE (vector_type), op);
2497                     break;
2498
2499                   default:
2500                     op = gimple_op (stmt, op_num + 1);
2501                     break;
2502                 }
2503             }
2504
2505           if (reduc_index != -1)
2506             {
2507               loop = (gimple_bb (stmt))->loop_father;
2508               def_stmt = SSA_NAME_DEF_STMT (op);
2509
2510               gcc_assert (loop);
2511
2512               /* Get the def before the loop.  In reduction chain we have only
2513                  one initial value.  */
2514               if ((j != (number_of_copies - 1)
2515                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2516                        && i != 0))
2517                   && neutral_op)
2518                 op = neutral_op;
2519               else
2520                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2521                                             loop_preheader_edge (loop));
2522             }
2523
2524           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2525           number_of_places_left_in_vector--;
2526           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2527             {
2528               if (constant_p)
2529                 {
2530                   op = fold_unary (VIEW_CONVERT_EXPR,
2531                                    TREE_TYPE (vector_type), op);
2532                   gcc_assert (op && CONSTANT_CLASS_P (op));
2533                 }
2534               else
2535                 {
2536                   tree new_temp
2537                     = make_ssa_name (TREE_TYPE (vector_type), NULL);
2538                   gimple init_stmt;
2539                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2540                                op);               
2541                   init_stmt
2542                     = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2543                                                     new_temp, op, NULL_TREE);
2544                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2545                   op = new_temp;
2546                 }
2547             }
2548           elts[number_of_places_left_in_vector] = op;
2549
2550           if (number_of_places_left_in_vector == 0)
2551             {
2552               number_of_places_left_in_vector = nunits;
2553
2554               if (constant_p)
2555                 vec_cst = build_vector (vector_type, elts);
2556               else
2557                 {
2558                   vec<constructor_elt, va_gc> *v;
2559                   unsigned k;
2560                   vec_alloc (v, nunits);
2561                   for (k = 0; k < nunits; ++k)
2562                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2563                   vec_cst = build_constructor (vector_type, v);
2564                 }
2565               voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2566                                                     vector_type, NULL));
2567               if (ctor_seq != NULL)
2568                 {
2569                   gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
2570                   gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2571                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2572                                                         GSI_SAME_STMT);
2573                   ctor_seq = NULL;
2574                 }
2575             }
2576         }
2577     }
2578
2579   /* Since the vectors are created in the reverse order, we should invert
2580      them.  */
2581   vec_num = voprnds.length ();
2582   for (j = vec_num; j != 0; j--)
2583     {
2584       vop = voprnds[j - 1];
2585       vec_oprnds->quick_push (vop);
2586     }
2587
2588   voprnds.release ();
2589
2590   /* In case that VF is greater than the unrolling factor needed for the SLP
2591      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2592      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2593      to replicate the vectors.  */
2594   while (number_of_vectors > vec_oprnds->length ())
2595     {
2596       tree neutral_vec = NULL;
2597
2598       if (neutral_op)
2599         {
2600           if (!neutral_vec)
2601             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2602
2603           vec_oprnds->quick_push (neutral_vec);
2604         }
2605       else
2606         {
2607           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2608             vec_oprnds->quick_push (vop);
2609         }
2610     }
2611 }
2612
2613
2614 /* Get vectorized definitions from SLP_NODE that contains corresponding
2615    vectorized def-stmts.  */
2616
2617 static void
2618 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2619 {
2620   tree vec_oprnd;
2621   gimple vec_def_stmt;
2622   unsigned int i;
2623
2624   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2625
2626   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2627     {
2628       gcc_assert (vec_def_stmt);
2629       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2630       vec_oprnds->quick_push (vec_oprnd);
2631     }
2632 }
2633
2634
2635 /* Get vectorized definitions for SLP_NODE.
2636    If the scalar definitions are loop invariants or constants, collect them and
2637    call vect_get_constant_vectors() to create vector stmts.
2638    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2639    must be stored in the corresponding child of SLP_NODE, and we call
2640    vect_get_slp_vect_defs () to retrieve them.  */
2641
2642 void
2643 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2644                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2645 {
2646   gimple first_stmt;
2647   int number_of_vects = 0, i;
2648   unsigned int child_index = 0;
2649   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2650   slp_tree child = NULL;
2651   vec<tree> vec_defs;
2652   tree oprnd;
2653   bool vectorized_defs;
2654
2655   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2656   FOR_EACH_VEC_ELT (ops, i, oprnd)
2657     {
2658       /* For each operand we check if it has vectorized definitions in a child
2659          node or we need to create them (for invariants and constants).  We
2660          check if the LHS of the first stmt of the next child matches OPRND.
2661          If it does, we found the correct child.  Otherwise, we call
2662          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2663          to check this child node for the next operand.  */
2664       vectorized_defs = false;
2665       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2666         {
2667           child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
2668
2669           /* We have to check both pattern and original def, if available.  */
2670           gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2671           gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2672
2673           if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2674               || (related
2675                   && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2676             {
2677               /* The number of vector defs is determined by the number of
2678                  vector statements in the node from which we get those
2679                  statements.  */
2680               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2681               vectorized_defs = true;
2682               child_index++;
2683             }
2684         }
2685
2686       if (!vectorized_defs)
2687         {
2688           if (i == 0)
2689             {
2690               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2691               /* Number of vector stmts was calculated according to LHS in
2692                  vect_schedule_slp_instance (), fix it by replacing LHS with
2693                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2694                  details.  */
2695               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2696                                              &rhs_size_unit);
2697               if (rhs_size_unit != lhs_size_unit)
2698                 {
2699                   number_of_vects *= rhs_size_unit;
2700                   number_of_vects /= lhs_size_unit;
2701                 }
2702             }
2703         }
2704
2705       /* Allocate memory for vectorized defs.  */
2706       vec_defs = vNULL;
2707       vec_defs.create (number_of_vects);
2708
2709       /* For reduction defs we call vect_get_constant_vectors (), since we are
2710          looking for initial loop invariant values.  */
2711       if (vectorized_defs && reduc_index == -1)
2712         /* The defs are already vectorized.  */
2713         vect_get_slp_vect_defs (child, &vec_defs);
2714       else
2715         /* Build vectors from scalar defs.  */
2716         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2717                                    number_of_vects, reduc_index);
2718
2719       vec_oprnds->quick_push (vec_defs);
2720
2721       /* For reductions, we only need initial values.  */
2722       if (reduc_index != -1)
2723         return;
2724     }
2725 }
2726
2727
2728 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2729    building a vector of type MASK_TYPE from it) and two input vectors placed in
2730    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2731    shifting by STRIDE elements of DR_CHAIN for every copy.
2732    (STRIDE is the number of vectorized stmts for NODE divided by the number of
2733    copies).
2734    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2735    the created stmts must be inserted.  */
2736
2737 static inline void
2738 vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
2739                            tree mask, int first_vec_indx, int second_vec_indx,
2740                            gimple_stmt_iterator *gsi, slp_tree node,
2741                            tree vectype, vec<tree> dr_chain,
2742                            int ncopies, int vect_stmts_counter)
2743 {
2744   tree perm_dest;
2745   gimple perm_stmt = NULL;
2746   stmt_vec_info next_stmt_info;
2747   int i, stride;
2748   tree first_vec, second_vec, data_ref;
2749
2750   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
2751
2752   /* Initialize the vect stmts of NODE to properly insert the generated
2753      stmts later.  */
2754   for (i = SLP_TREE_VEC_STMTS (node).length ();
2755        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2756     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
2757
2758   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2759   for (i = 0; i < ncopies; i++)
2760     {
2761       first_vec = dr_chain[first_vec_indx];
2762       second_vec = dr_chain[second_vec_indx];
2763
2764       /* Generate the permute statement.  */
2765       perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2766                                                 first_vec, second_vec, mask);
2767       data_ref = make_ssa_name (perm_dest, perm_stmt);
2768       gimple_set_lhs (perm_stmt, data_ref);
2769       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2770
2771       /* Store the vector statement in NODE.  */
2772       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
2773
2774       first_vec_indx += stride;
2775       second_vec_indx += stride;
2776     }
2777
2778   /* Mark the scalar stmt as vectorized.  */
2779   next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2780   STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2781 }
2782
2783
2784 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
2785    return in CURRENT_MASK_ELEMENT its equivalent in target specific
2786    representation.  Check that the mask is valid and return FALSE if not.
2787    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2788    the next vector, i.e., the current first vector is not needed.  */
2789
2790 static bool
2791 vect_get_mask_element (gimple stmt, int first_mask_element, int m,
2792                        int mask_nunits, bool only_one_vec, int index,
2793                        unsigned char *mask, int *current_mask_element,
2794                        bool *need_next_vector, int *number_of_mask_fixes,
2795                        bool *mask_fixed, bool *needs_first_vector)
2796 {
2797   int i;
2798
2799   /* Convert to target specific representation.  */
2800   *current_mask_element = first_mask_element + m;
2801   /* Adjust the value in case it's a mask for second and third vectors.  */
2802   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
2803
2804   if (*current_mask_element < mask_nunits)
2805     *needs_first_vector = true;
2806
2807   /* We have only one input vector to permute but the mask accesses values in
2808      the next vector as well.  */
2809   if (only_one_vec && *current_mask_element >= mask_nunits)
2810     {
2811       if (dump_enabled_p ())
2812         {
2813           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2814                            "permutation requires at least two vectors ");
2815           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2816         }
2817
2818       return false;
2819     }
2820
2821   /* The mask requires the next vector.  */
2822   if (*current_mask_element >= mask_nunits * 2)
2823     {
2824       if (*needs_first_vector || *mask_fixed)
2825         {
2826           /* We either need the first vector too or have already moved to the
2827              next vector. In both cases, this permutation needs three
2828              vectors.  */
2829           if (dump_enabled_p ())
2830             {
2831               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2832                                "permutation requires at "
2833                                "least three vectors ");
2834               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2835             }
2836
2837           return false;
2838         }
2839
2840       /* We move to the next vector, dropping the first one and working with
2841          the second and the third - we need to adjust the values of the mask
2842          accordingly.  */
2843       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
2844
2845       for (i = 0; i < index; i++)
2846         mask[i] -= mask_nunits * *number_of_mask_fixes;
2847
2848       (*number_of_mask_fixes)++;
2849       *mask_fixed = true;
2850     }
2851
2852   *need_next_vector = *mask_fixed;
2853
2854   /* This was the last element of this mask. Start a new one.  */
2855   if (index == mask_nunits - 1)
2856     {
2857       *number_of_mask_fixes = 1;
2858       *mask_fixed = false;
2859       *needs_first_vector = false;
2860     }
2861
2862   return true;
2863 }
2864
2865
2866 /* Generate vector permute statements from a list of loads in DR_CHAIN.
2867    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2868    permute statements for SLP_NODE_INSTANCE.  */
2869 bool
2870 vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
2871                               gimple_stmt_iterator *gsi, int vf,
2872                               slp_instance slp_node_instance, bool analyze_only)
2873 {
2874   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2875   tree mask_element_type = NULL_TREE, mask_type;
2876   int i, j, k, nunits, vec_index = 0, scalar_index;
2877   slp_tree node;
2878   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2879   gimple next_scalar_stmt;
2880   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2881   int first_mask_element;
2882   int index, unroll_factor, current_mask_element, ncopies;
2883   unsigned char *mask;
2884   bool only_one_vec = false, need_next_vector = false;
2885   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2886   int number_of_mask_fixes = 1;
2887   bool mask_fixed = false;
2888   bool needs_first_vector = false;
2889   enum machine_mode mode;
2890
2891   mode = TYPE_MODE (vectype);
2892
2893   if (!can_vec_perm_p (mode, false, NULL))
2894     {
2895       if (dump_enabled_p ())
2896         {
2897           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2898                            "no vect permute for ");
2899           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2900         }
2901       return false;
2902     }
2903
2904   /* The generic VEC_PERM_EXPR code always uses an integral type of the
2905      same size as the vector element being permuted.  */
2906   mask_element_type = lang_hooks.types.type_for_mode
2907                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
2908   mask_type = get_vectype_for_scalar_type (mask_element_type);
2909   nunits = TYPE_VECTOR_SUBPARTS (vectype);
2910   mask = XALLOCAVEC (unsigned char, nunits);
2911   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2912
2913   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2914      unrolling factor.  */
2915   orig_vec_stmts_num = group_size *
2916                 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2917   if (orig_vec_stmts_num == 1)
2918     only_one_vec = true;
2919
2920   /* Number of copies is determined by the final vectorization factor
2921      relatively to SLP_NODE_INSTANCE unrolling factor.  */
2922   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2923
2924   /* Generate permutation masks for every NODE. Number of masks for each NODE
2925      is equal to GROUP_SIZE.
2926      E.g., we have a group of three nodes with three loads from the same
2927      location in each node, and the vector size is 4. I.e., we have a
2928      a0b0c0a1b1c1... sequence and we need to create the following vectors:
2929      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2930      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2931      ...
2932
2933      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
2934      The last mask is illegal since we assume two operands for permute
2935      operation, and the mask element values can't be outside that range.
2936      Hence, the last mask must be converted into {2,5,5,5}.
2937      For the first two permutations we need the first and the second input
2938      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
2939      we need the second and the third vectors: {b1,c1,a2,b2} and
2940      {c2,a3,b3,c3}.  */
2941
2942   FOR_EACH_VEC_ELT  (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
2943     {
2944       scalar_index = 0;
2945       index = 0;
2946       vect_stmts_counter = 0;
2947       vec_index = 0;
2948       first_vec_index = vec_index++;
2949       if (only_one_vec)
2950         second_vec_index = first_vec_index;
2951       else
2952         second_vec_index =  vec_index++;
2953
2954       for (j = 0; j < unroll_factor; j++)
2955         {
2956           for (k = 0; k < group_size; k++)
2957             {
2958               first_mask_element = i + j * group_size;
2959               if (!vect_get_mask_element (stmt, first_mask_element, 0,
2960                                           nunits, only_one_vec, index,
2961                                           mask, &current_mask_element,
2962                                           &need_next_vector,
2963                                           &number_of_mask_fixes, &mask_fixed,
2964                                           &needs_first_vector))
2965                 return false;
2966               mask[index++] = current_mask_element;
2967
2968               if (index == nunits)
2969                 {
2970                   tree mask_vec, *mask_elts;
2971                   int l;
2972
2973                   if (!can_vec_perm_p (mode, false, mask))
2974                     {
2975                       if (dump_enabled_p ())
2976                         {
2977                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2978                                            vect_location, 
2979                                            "unsupported vect permute { ");
2980                           for (i = 0; i < nunits; ++i)
2981                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2982                                          mask[i]);
2983                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2984                         }
2985                       return false;
2986                     }
2987
2988                   mask_elts = XALLOCAVEC (tree, nunits);
2989                   for (l = 0; l < nunits; ++l)
2990                     mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2991                   mask_vec = build_vector (mask_type, mask_elts);
2992                   index = 0;
2993
2994                   if (!analyze_only)
2995                     {
2996                       if (need_next_vector)
2997                         {
2998                           first_vec_index = second_vec_index;
2999                           second_vec_index = vec_index;
3000                         }
3001
3002                       next_scalar_stmt
3003                           = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
3004
3005                       vect_create_mask_and_perm (stmt, next_scalar_stmt,
3006                                mask_vec, first_vec_index, second_vec_index,
3007                                gsi, node, vectype, dr_chain,
3008                                ncopies, vect_stmts_counter++);
3009                     }
3010                 }
3011             }
3012         }
3013     }
3014
3015   return true;
3016 }
3017
3018
3019
3020 /* Vectorize SLP instance tree in postorder.  */
3021
3022 static bool
3023 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3024                             unsigned int vectorization_factor)
3025 {
3026   gimple stmt;
3027   bool grouped_store, is_store;
3028   gimple_stmt_iterator si;
3029   stmt_vec_info stmt_info;
3030   unsigned int vec_stmts_size, nunits, group_size;
3031   tree vectype;
3032   int i;
3033   slp_tree loads_node;
3034   slp_tree child;
3035
3036   if (!node)
3037     return false;
3038
3039   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3040     vect_schedule_slp_instance (child, instance, vectorization_factor);
3041
3042   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3043   stmt_info = vinfo_for_stmt (stmt);
3044
3045   /* VECTYPE is the type of the destination.  */
3046   vectype = STMT_VINFO_VECTYPE (stmt_info);
3047   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3048   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3049
3050   /* For each SLP instance calculate number of vector stmts to be created
3051      for the scalar stmts in each node of the SLP tree.  Number of vector
3052      elements in one vector iteration is the number of scalar elements in
3053      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3054      size.  */
3055   vec_stmts_size = (vectorization_factor * group_size) / nunits;
3056
3057   /* In case of load permutation we have to allocate vectorized statements for
3058      all the nodes that participate in that permutation.  */
3059   if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3060     {
3061       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
3062         {
3063           if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
3064             {
3065               SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
3066               SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3067             }
3068         }
3069     }
3070
3071   if (!SLP_TREE_VEC_STMTS (node).exists ())
3072     {
3073       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3074       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3075     }
3076
3077   if (dump_enabled_p ())
3078     {
3079       dump_printf_loc (MSG_NOTE,vect_location,
3080                        "------>vectorizing SLP node starting from: ");
3081       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3082     }
3083
3084   /* Loads should be inserted before the first load.  */
3085   if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
3086       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
3087       && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
3088       && SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
3089     si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
3090   else if (is_pattern_stmt_p (stmt_info))
3091     si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3092   else
3093     si = gsi_for_stmt (stmt);
3094
3095   /* Stores should be inserted just before the last store.  */
3096   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
3097       && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3098     { 
3099       gimple last_store = vect_find_last_store_in_slp_instance (instance);
3100       if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3101        last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
3102       si = gsi_for_stmt (last_store);
3103     }
3104
3105   /* Mark the first element of the reduction chain as reduction to properly
3106      transform the node.  In the analysis phase only the last element of the
3107      chain is marked as reduction.  */
3108   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3109       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3110     {
3111       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3112       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3113     }
3114
3115   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3116   return is_store;
3117 }
3118
3119 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3120    For loop vectorization this is done in vectorizable_call, but for SLP
3121    it needs to be deferred until end of vect_schedule_slp, because multiple
3122    SLP instances may refer to the same scalar stmt.  */
3123
3124 static void
3125 vect_remove_slp_scalar_calls (slp_tree node)
3126 {
3127   gimple stmt, new_stmt;
3128   gimple_stmt_iterator gsi;
3129   int i;
3130   slp_tree child;
3131   tree lhs;
3132   stmt_vec_info stmt_info;
3133
3134   if (!node)
3135     return;
3136
3137   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3138     vect_remove_slp_scalar_calls (child);
3139
3140   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3141     {
3142       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3143         continue;
3144       stmt_info = vinfo_for_stmt (stmt);
3145       if (stmt_info == NULL
3146           || is_pattern_stmt_p (stmt_info)
3147           || !PURE_SLP_STMT (stmt_info))
3148         continue;
3149       lhs = gimple_call_lhs (stmt);
3150       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3151       set_vinfo_for_stmt (new_stmt, stmt_info);
3152       set_vinfo_for_stmt (stmt, NULL);
3153       STMT_VINFO_STMT (stmt_info) = new_stmt;
3154       gsi = gsi_for_stmt (stmt);
3155       gsi_replace (&gsi, new_stmt, false);
3156       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3157     }
3158 }
3159
3160 /* Generate vector code for all SLP instances in the loop/basic block.  */
3161
3162 bool
3163 vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3164 {
3165   vec<slp_instance> slp_instances;
3166   slp_instance instance;
3167   slp_tree loads_node;
3168   unsigned int i, j, vf;
3169   bool is_store = false;
3170
3171   if (loop_vinfo)
3172     {
3173       slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3174       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3175     }
3176   else
3177     {
3178       slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3179       vf = 1;
3180     }
3181
3182   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3183     {
3184       /* Schedule the tree of INSTANCE.  */
3185       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3186                                              instance, vf);
3187
3188       /* Clear STMT_VINFO_VEC_STMT of all loads.  With shared loads
3189          between SLP instances we fail to properly initialize the
3190          vectorized SLP stmts and confuse different load permutations.  */
3191       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, loads_node)
3192         STMT_VINFO_VEC_STMT
3193           (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (loads_node)[0])) = NULL;
3194
3195       if (dump_enabled_p ())
3196         dump_printf_loc (MSG_NOTE, vect_location,
3197                          "vectorizing stmts using SLP.");
3198     }
3199
3200   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3201     {
3202       slp_tree root = SLP_INSTANCE_TREE (instance);
3203       gimple store;
3204       unsigned int j;
3205       gimple_stmt_iterator gsi;
3206
3207       /* Remove scalar call stmts.  Do not do this for basic-block
3208          vectorization as not all uses may be vectorized.
3209          ???  Why should this be necessary?  DCE should be able to
3210          remove the stmts itself.
3211          ???  For BB vectorization we can as well remove scalar
3212          stmts starting from the SLP tree root if they have no
3213          uses.  */
3214       if (loop_vinfo)
3215         vect_remove_slp_scalar_calls (root);
3216
3217       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3218                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3219         {
3220           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3221             break;
3222
3223          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3224            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3225           /* Free the attached stmt_vec_info and remove the stmt.  */
3226           gsi = gsi_for_stmt (store);
3227           unlink_stmt_vdef (store);
3228           gsi_remove (&gsi, true);
3229           release_defs (store);
3230           free_stmt_vec_info (store);
3231         }
3232     }
3233
3234   return is_store;
3235 }
3236
3237
3238 /* Vectorize the basic block.  */
3239
3240 void
3241 vect_slp_transform_bb (basic_block bb)
3242 {
3243   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3244   gimple_stmt_iterator si;
3245
3246   gcc_assert (bb_vinfo);
3247
3248   if (dump_enabled_p ())
3249     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3250
3251   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3252     {
3253       gimple stmt = gsi_stmt (si);
3254       stmt_vec_info stmt_info;
3255
3256       if (dump_enabled_p ())
3257         {
3258           dump_printf_loc (MSG_NOTE, vect_location,
3259                            "------>SLPing statement: ");
3260           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3261         }
3262
3263       stmt_info = vinfo_for_stmt (stmt);
3264       gcc_assert (stmt_info);
3265
3266       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3267       if (STMT_SLP_TYPE (stmt_info))
3268         {
3269           vect_schedule_slp (NULL, bb_vinfo);
3270           break;
3271         }
3272     }
3273
3274   if (dump_enabled_p ())
3275     dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
3276
3277   destroy_bb_vec_info (bb_vinfo);
3278 }