alias.c: Reorder #include statements and remove duplicates.
[platform/upstream/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2    Copyright (C) 2007-2015 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h"              /* FIXME: for insn_data */
35 #include "gimple-pretty-print.h"
36 #include "dumpfile.h"
37 #include "params.h"
38 #include "alias.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "internal-fn.h"
42 #include "gimple-iterator.h"
43 #include "cfgloop.h"
44 #include "flags.h"
45 #include "tree-vectorizer.h"
46 #include "langhooks.h"
47 #include "gimple-walk.h"
48
49 /* Extract the location of the basic block in the source code.
50    Return the basic block location if succeed and NULL if not.  */
51
52 source_location
53 find_bb_location (basic_block bb)
54 {
55   gimple *stmt = NULL;
56   gimple_stmt_iterator si;
57
58   if (!bb)
59     return UNKNOWN_LOCATION;
60
61   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
62     {
63       stmt = gsi_stmt (si);
64       if (gimple_location (stmt) != UNKNOWN_LOCATION)
65         return gimple_location (stmt);
66     }
67
68   return UNKNOWN_LOCATION;
69 }
70
71
72 /* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
73
74 static void
75 vect_free_slp_tree (slp_tree node)
76 {
77   int i;
78   slp_tree child;
79
80   if (!node)
81     return;
82
83   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
84     vect_free_slp_tree (child);
85
86   SLP_TREE_CHILDREN (node).release ();
87   SLP_TREE_SCALAR_STMTS (node).release ();
88   SLP_TREE_VEC_STMTS (node).release ();
89   SLP_TREE_LOAD_PERMUTATION (node).release ();
90
91   free (node);
92 }
93
94
95 /* Free the memory allocated for the SLP instance.  */
96
97 void
98 vect_free_slp_instance (slp_instance instance)
99 {
100   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
101   SLP_INSTANCE_LOADS (instance).release ();
102   free (instance);
103 }
104
105
106 /* Create an SLP node for SCALAR_STMTS.  */
107
108 static slp_tree
109 vect_create_new_slp_node (vec<gimple *> scalar_stmts)
110 {
111   slp_tree node;
112   gimple *stmt = scalar_stmts[0];
113   unsigned int nops;
114
115   if (is_gimple_call (stmt))
116     nops = gimple_call_num_args (stmt);
117   else if (is_gimple_assign (stmt))
118     {
119       nops = gimple_num_ops (stmt) - 1;
120       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
121         nops++;
122     }
123   else
124     return NULL;
125
126   node = XNEW (struct _slp_tree);
127   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
128   SLP_TREE_VEC_STMTS (node).create (0);
129   SLP_TREE_CHILDREN (node).create (nops);
130   SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
131   SLP_TREE_TWO_OPERATORS (node) = false;
132
133   return node;
134 }
135
136
137 /* This structure is used in creation of an SLP tree.  Each instance
138    corresponds to the same operand in a group of scalar stmts in an SLP
139    node.  */
140 typedef struct _slp_oprnd_info
141 {
142   /* Def-stmts for the operands.  */
143   vec<gimple *> def_stmts;
144   /* Information about the first statement, its vector def-type, type, the
145      operand itself in case it's constant, and an indication if it's a pattern
146      stmt.  */
147   enum vect_def_type first_dt;
148   tree first_op_type;
149   bool first_pattern;
150   bool second_pattern;
151 } *slp_oprnd_info;
152
153
154 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
155    operand.  */
156 static vec<slp_oprnd_info> 
157 vect_create_oprnd_info (int nops, int group_size)
158 {
159   int i;
160   slp_oprnd_info oprnd_info;
161   vec<slp_oprnd_info> oprnds_info;
162
163   oprnds_info.create (nops);
164   for (i = 0; i < nops; i++)
165     {
166       oprnd_info = XNEW (struct _slp_oprnd_info);
167       oprnd_info->def_stmts.create (group_size);
168       oprnd_info->first_dt = vect_uninitialized_def;
169       oprnd_info->first_op_type = NULL_TREE;
170       oprnd_info->first_pattern = false;
171       oprnd_info->second_pattern = false;
172       oprnds_info.quick_push (oprnd_info);
173     }
174
175   return oprnds_info;
176 }
177
178
179 /* Free operands info.  */
180
181 static void
182 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
183 {
184   int i;
185   slp_oprnd_info oprnd_info;
186
187   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
188     {
189       oprnd_info->def_stmts.release ();
190       XDELETE (oprnd_info);
191     }
192
193   oprnds_info.release ();
194 }
195
196
197 /* Find the place of the data-ref in STMT in the interleaving chain that starts
198    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
199
200 static int
201 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
202 {
203   gimple *next_stmt = first_stmt;
204   int result = 0;
205
206   if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
207     return -1;
208
209   do
210     {
211       if (next_stmt == stmt)
212         return result;
213       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
214       if (next_stmt)
215         result += GROUP_GAP (vinfo_for_stmt (next_stmt));
216     }
217   while (next_stmt);
218
219   return -1;
220 }
221
222
223 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
224    they are of a valid type and that they match the defs of the first stmt of
225    the SLP group (stored in OPRNDS_INFO).  If there was a fatal error
226    return -1, if the error could be corrected by swapping operands of the
227    operation return 1, if everything is ok return 0.  */
228
229 static int 
230 vect_get_and_check_slp_defs (vec_info *vinfo,
231                              gimple *stmt, unsigned stmt_num,
232                              vec<slp_oprnd_info> *oprnds_info)
233 {
234   tree oprnd;
235   unsigned int i, number_of_oprnds;
236   gimple *def_stmt;
237   enum vect_def_type dt = vect_uninitialized_def;
238   struct loop *loop = NULL;
239   bool pattern = false;
240   slp_oprnd_info oprnd_info;
241   int first_op_idx = 1;
242   bool commutative = false;
243   bool first_op_cond = false;
244   bool first = stmt_num == 0;
245   bool second = stmt_num == 1;
246
247   if (is_a <loop_vec_info> (vinfo))
248     loop = LOOP_VINFO_LOOP (as_a <loop_vec_info> (vinfo));
249
250   if (is_gimple_call (stmt))
251     {
252       number_of_oprnds = gimple_call_num_args (stmt);
253       first_op_idx = 3;
254     }
255   else if (is_gimple_assign (stmt))
256     {
257       enum tree_code code = gimple_assign_rhs_code (stmt);
258       number_of_oprnds = gimple_num_ops (stmt) - 1;
259       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
260         {
261           first_op_cond = true;
262           commutative = true;
263           number_of_oprnds++;
264         }
265       else
266         commutative = commutative_tree_code (code);
267     }
268   else
269     return -1;
270
271   bool swapped = false;
272   for (i = 0; i < number_of_oprnds; i++)
273     {
274 again:
275       if (first_op_cond)
276         {
277           if (i == 0 || i == 1)
278             oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
279                                   swapped ? !i : i);
280           else
281             oprnd = gimple_op (stmt, first_op_idx + i - 1);
282         }
283       else
284         oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
285
286       oprnd_info = (*oprnds_info)[i];
287
288       if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
289         {
290           if (dump_enabled_p ())
291             {
292               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
293                                "Build SLP failed: can't analyze def for ");
294               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
295               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
296             }
297
298           return -1;
299         }
300
301       /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
302          from the pattern.  Check that all the stmts of the node are in the
303          pattern.  */
304       if (def_stmt && gimple_bb (def_stmt)
305           && ((is_a <loop_vec_info> (vinfo)
306                && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
307               || (is_a <bb_vec_info> (vinfo)
308                   && gimple_bb (def_stmt) == as_a <bb_vec_info> (vinfo)->bb
309                   && gimple_code (def_stmt) != GIMPLE_PHI))
310           && vinfo_for_stmt (def_stmt)
311           && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
312           && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
313           && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
314         {
315           pattern = true;
316           if (!first && !oprnd_info->first_pattern
317               /* Allow different pattern state for the defs of the
318                  first stmt in reduction chains.  */
319               && (oprnd_info->first_dt != vect_reduction_def
320                   || (!second && !oprnd_info->second_pattern)))
321             {
322               if (i == 0
323                   && !swapped
324                   && commutative)
325                 {
326                   swapped = true;
327                   goto again;
328                 }
329
330               if (dump_enabled_p ())
331                 {
332                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
333                                    "Build SLP failed: some of the stmts"
334                                    " are in a pattern, and others are not ");
335                   dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
336                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
337                 }
338
339               return 1;
340             }
341
342           def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
343           dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
344
345           if (dt == vect_unknown_def_type)
346             {
347               if (dump_enabled_p ())
348                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
349                                  "Unsupported pattern.\n");
350               return -1;
351             }
352
353           switch (gimple_code (def_stmt))
354             {
355             case GIMPLE_PHI:
356             case GIMPLE_ASSIGN:
357               break;
358
359             default:
360               if (dump_enabled_p ())
361                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
362                                  "unsupported defining stmt:\n");
363               return -1;
364             }
365         }
366
367       if (second)
368         oprnd_info->second_pattern = pattern;
369
370       if (first)
371         {
372           oprnd_info->first_dt = dt;
373           oprnd_info->first_pattern = pattern;
374           oprnd_info->first_op_type = TREE_TYPE (oprnd);
375         }
376       else
377         {
378           /* Not first stmt of the group, check that the def-stmt/s match
379              the def-stmt/s of the first stmt.  Allow different definition
380              types for reduction chains: the first stmt must be a
381              vect_reduction_def (a phi node), and the rest
382              vect_internal_def.  */
383           if (((oprnd_info->first_dt != dt
384                 && !(oprnd_info->first_dt == vect_reduction_def
385                      && dt == vect_internal_def)
386                 && !((oprnd_info->first_dt == vect_external_def
387                       || oprnd_info->first_dt == vect_constant_def)
388                      && (dt == vect_external_def
389                          || dt == vect_constant_def)))
390                || !types_compatible_p (oprnd_info->first_op_type,
391                                        TREE_TYPE (oprnd))))
392             {
393               /* Try swapping operands if we got a mismatch.  */
394               if (i == 0
395                   && !swapped
396                   && commutative)
397                 {
398                   swapped = true;
399                   goto again;
400                 }
401
402               if (dump_enabled_p ())
403                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
404                                  "Build SLP failed: different types\n");
405
406               return 1;
407             }
408         }
409
410       /* Check the types of the definitions.  */
411       switch (dt)
412         {
413         case vect_constant_def:
414         case vect_external_def:
415         case vect_reduction_def:
416           break;
417
418         case vect_internal_def:
419           oprnd_info->def_stmts.quick_push (def_stmt);
420           break;
421
422         default:
423           /* FORNOW: Not supported.  */
424           if (dump_enabled_p ())
425             {
426               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
427                                "Build SLP failed: illegal type of def ");
428               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
429               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
430             }
431
432           return -1;
433         }
434     }
435
436   /* Swap operands.  */
437   if (swapped)
438     {
439       if (first_op_cond)
440         {
441           tree cond = gimple_assign_rhs1 (stmt);
442           swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
443                              &TREE_OPERAND (cond, 1));
444           TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
445         }
446       else
447         swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
448                            gimple_assign_rhs2_ptr (stmt));
449     }
450
451   return 0;
452 }
453
454
455 /* Verify if the scalar stmts STMTS are isomorphic, require data
456    permutation or are of unsupported types of operation.  Return
457    true if they are, otherwise return false and indicate in *MATCHES
458    which stmts are not isomorphic to the first one.  If MATCHES[0]
459    is false then this indicates the comparison could not be
460    carried out or the stmts will never be vectorized by SLP.  */
461
462 static bool
463 vect_build_slp_tree_1 (vec_info *vinfo,
464                        vec<gimple *> stmts, unsigned int group_size,
465                        unsigned nops, unsigned int *max_nunits,
466                        unsigned int vectorization_factor, bool *matches,
467                        bool *two_operators)
468 {
469   unsigned int i;
470   gimple *first_stmt = stmts[0], *stmt = stmts[0];
471   enum tree_code first_stmt_code = ERROR_MARK;
472   enum tree_code alt_stmt_code = ERROR_MARK;
473   enum tree_code rhs_code = ERROR_MARK;
474   enum tree_code first_cond_code = ERROR_MARK;
475   tree lhs;
476   bool need_same_oprnds = false;
477   tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
478   optab optab;
479   int icode;
480   machine_mode optab_op2_mode;
481   machine_mode vec_mode;
482   HOST_WIDE_INT dummy;
483   gimple *first_load = NULL, *prev_first_load = NULL;
484   tree cond;
485
486   /* For every stmt in NODE find its def stmt/s.  */
487   FOR_EACH_VEC_ELT (stmts, i, stmt)
488     {
489       matches[i] = false;
490
491       if (dump_enabled_p ())
492         {
493           dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
494           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
495           dump_printf (MSG_NOTE, "\n");
496         }
497
498       /* Fail to vectorize statements marked as unvectorizable.  */
499       if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
500         {
501           if (dump_enabled_p ())
502             {
503               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
504                                "Build SLP failed: unvectorizable statement ");
505               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
506               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
507             }
508           /* Fatal mismatch.  */
509           matches[0] = false;
510           return false;
511         }
512
513       lhs = gimple_get_lhs (stmt);
514       if (lhs == NULL_TREE)
515         {
516           if (dump_enabled_p ())
517             {
518               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
519                                "Build SLP failed: not GIMPLE_ASSIGN nor "
520                                "GIMPLE_CALL ");
521               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
522               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
523             }
524           /* Fatal mismatch.  */
525           matches[0] = false;
526           return false;
527         }
528
529        if (is_gimple_assign (stmt)
530            && gimple_assign_rhs_code (stmt) == COND_EXPR
531            && (cond = gimple_assign_rhs1 (stmt))
532            && !COMPARISON_CLASS_P (cond))
533         {
534           if (dump_enabled_p ())
535             {
536               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
537                                "Build SLP failed: condition is not "
538                                "comparison ");
539               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
540               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
541             }
542           /* Fatal mismatch.  */
543           matches[0] = false;
544           return false;
545         }
546
547       scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
548       vectype = get_vectype_for_scalar_type (scalar_type);
549       if (!vectype)
550         {
551           if (dump_enabled_p ())
552             {
553               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
554                                "Build SLP failed: unsupported data-type ");
555               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
556                                  scalar_type);
557               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
558             }
559           /* Fatal mismatch.  */
560           matches[0] = false;
561           return false;
562         }
563
564       /* If populating the vector type requires unrolling then fail
565          before adjusting *max_nunits for basic-block vectorization.  */
566       if (is_a <bb_vec_info> (vinfo)
567           && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
568         {
569           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
570                            "Build SLP failed: unrolling required "
571                            "in basic block SLP\n");
572           /* Fatal mismatch.  */
573           matches[0] = false;
574           return false;
575         }
576
577       /* In case of multiple types we need to detect the smallest type.  */
578       if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
579         {
580           *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
581           if (is_a <bb_vec_info> (vinfo))
582             vectorization_factor = *max_nunits;
583         }
584
585       if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
586         {
587           rhs_code = CALL_EXPR;
588           if (gimple_call_internal_p (call_stmt)
589               || gimple_call_tail_p (call_stmt)
590               || gimple_call_noreturn_p (call_stmt)
591               || !gimple_call_nothrow_p (call_stmt)
592               || gimple_call_chain (call_stmt))
593             {
594               if (dump_enabled_p ())
595                 {
596                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
597                                    "Build SLP failed: unsupported call type ");
598                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
599                                     call_stmt, 0);
600                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
601                 }
602               /* Fatal mismatch.  */
603               matches[0] = false;
604               return false;
605             }
606         }
607       else
608         rhs_code = gimple_assign_rhs_code (stmt);
609
610       /* Check the operation.  */
611       if (i == 0)
612         {
613           first_stmt_code = rhs_code;
614
615           /* Shift arguments should be equal in all the packed stmts for a
616              vector shift with scalar shift operand.  */
617           if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
618               || rhs_code == LROTATE_EXPR
619               || rhs_code == RROTATE_EXPR)
620             {
621               vec_mode = TYPE_MODE (vectype);
622
623               /* First see if we have a vector/vector shift.  */
624               optab = optab_for_tree_code (rhs_code, vectype,
625                                            optab_vector);
626
627               if (!optab
628                   || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
629                 {
630                   /* No vector/vector shift, try for a vector/scalar shift.  */
631                   optab = optab_for_tree_code (rhs_code, vectype,
632                                                optab_scalar);
633
634                   if (!optab)
635                     {
636                       if (dump_enabled_p ())
637                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
638                                          "Build SLP failed: no optab.\n");
639                       /* Fatal mismatch.  */
640                       matches[0] = false;
641                       return false;
642                     }
643                   icode = (int) optab_handler (optab, vec_mode);
644                   if (icode == CODE_FOR_nothing)
645                     {
646                       if (dump_enabled_p ())
647                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
648                                          "Build SLP failed: "
649                                          "op not supported by target.\n");
650                       /* Fatal mismatch.  */
651                       matches[0] = false;
652                       return false;
653                     }
654                   optab_op2_mode = insn_data[icode].operand[2].mode;
655                   if (!VECTOR_MODE_P (optab_op2_mode))
656                     {
657                       need_same_oprnds = true;
658                       first_op1 = gimple_assign_rhs2 (stmt);
659                     }
660                 }
661             }
662           else if (rhs_code == WIDEN_LSHIFT_EXPR)
663             {
664               need_same_oprnds = true;
665               first_op1 = gimple_assign_rhs2 (stmt);
666             }
667         }
668       else
669         {
670           if (first_stmt_code != rhs_code
671               && alt_stmt_code == ERROR_MARK)
672             alt_stmt_code = rhs_code;
673           if (first_stmt_code != rhs_code
674               && (first_stmt_code != IMAGPART_EXPR
675                   || rhs_code != REALPART_EXPR)
676               && (first_stmt_code != REALPART_EXPR
677                   || rhs_code != IMAGPART_EXPR)
678               /* Handle mismatches in plus/minus by computing both
679                  and merging the results.  */
680               && !((first_stmt_code == PLUS_EXPR
681                     || first_stmt_code == MINUS_EXPR)
682                    && (alt_stmt_code == PLUS_EXPR
683                        || alt_stmt_code == MINUS_EXPR)
684                    && rhs_code == alt_stmt_code)
685               && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
686                    && (first_stmt_code == ARRAY_REF
687                        || first_stmt_code == BIT_FIELD_REF
688                        || first_stmt_code == INDIRECT_REF
689                        || first_stmt_code == COMPONENT_REF
690                        || first_stmt_code == MEM_REF)))
691             {
692               if (dump_enabled_p ())
693                 {
694                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
695                                    "Build SLP failed: different operation "
696                                    "in stmt ");
697                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
698                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
699                                    "original stmt ");
700                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
701                                     first_stmt, 0);
702                 }
703               /* Mismatch.  */
704               continue;
705             }
706
707           if (need_same_oprnds
708               && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
709             {
710               if (dump_enabled_p ())
711                 {
712                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
713                                    "Build SLP failed: different shift "
714                                    "arguments in ");
715                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
716                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
717                 }
718               /* Mismatch.  */
719               continue;
720             }
721
722           if (rhs_code == CALL_EXPR)
723             {
724               gimple *first_stmt = stmts[0];
725               if (gimple_call_num_args (stmt) != nops
726                   || !operand_equal_p (gimple_call_fn (first_stmt),
727                                        gimple_call_fn (stmt), 0)
728                   || gimple_call_fntype (first_stmt)
729                      != gimple_call_fntype (stmt))
730                 {
731                   if (dump_enabled_p ())
732                     {
733                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
734                                        "Build SLP failed: different calls in ");
735                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
736                                         stmt, 0);
737                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
738                     }
739                   /* Mismatch.  */
740                   continue;
741                 }
742             }
743         }
744
745       /* Grouped store or load.  */
746       if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
747         {
748           if (REFERENCE_CLASS_P (lhs))
749             {
750               /* Store.  */
751               ;
752             }
753           else
754             {
755               /* Load.  */
756               /* Check that the size of interleaved loads group is not
757                  greater than the SLP group size.  */
758               unsigned ncopies
759                 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
760               if (is_a <loop_vec_info> (vinfo)
761                   && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
762                   && ((GROUP_SIZE (vinfo_for_stmt (stmt))
763                        - GROUP_GAP (vinfo_for_stmt (stmt)))
764                       > ncopies * group_size))
765                 {
766                   if (dump_enabled_p ())
767                     {
768                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769                                        "Build SLP failed: the number "
770                                        "of interleaved loads is greater than "
771                                        "the SLP group size ");
772                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
773                                         stmt, 0);
774                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
775                     }
776                   /* Fatal mismatch.  */
777                   matches[0] = false;
778                   return false;
779                 }
780
781               first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
782               if (prev_first_load)
783                 {
784                   /* Check that there are no loads from different interleaving
785                      chains in the same node.  */
786                   if (prev_first_load != first_load)
787                     {
788                       if (dump_enabled_p ())
789                         {
790                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
791                                            vect_location, 
792                                            "Build SLP failed: different "
793                                            "interleaving chains in one node ");
794                           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
795                                             stmt, 0);
796                           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
797                         }
798                       /* Mismatch.  */
799                       continue;
800                     }
801                 }
802               else
803                 prev_first_load = first_load;
804            }
805         } /* Grouped access.  */
806       else
807         {
808           if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
809             {
810               /* Not grouped load.  */
811               if (dump_enabled_p ())
812                 {
813                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
814                                    "Build SLP failed: not grouped load ");
815                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
816                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
817                 }
818
819               /* FORNOW: Not grouped loads are not supported.  */
820               /* Fatal mismatch.  */
821               matches[0] = false;
822               return false;
823             }
824
825           /* Not memory operation.  */
826           if (TREE_CODE_CLASS (rhs_code) != tcc_binary
827               && TREE_CODE_CLASS (rhs_code) != tcc_unary
828               && TREE_CODE_CLASS (rhs_code) != tcc_expression
829               && rhs_code != CALL_EXPR)
830             {
831               if (dump_enabled_p ())
832                 {
833                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
834                                    "Build SLP failed: operation");
835                   dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
836                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
837                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
838                 }
839               /* Fatal mismatch.  */
840               matches[0] = false;
841               return false;
842             }
843
844           if (rhs_code == COND_EXPR)
845             {
846               tree cond_expr = gimple_assign_rhs1 (stmt);
847
848               if (i == 0)
849                 first_cond_code = TREE_CODE (cond_expr);
850               else if (first_cond_code != TREE_CODE (cond_expr))
851                 {
852                   if (dump_enabled_p ())
853                     {
854                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
855                                        "Build SLP failed: different"
856                                        " operation");
857                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
858                                         stmt, 0);
859                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
860                     }
861                   /* Mismatch.  */
862                   continue;
863                 }
864             }
865         }
866
867       matches[i] = true;
868     }
869
870   for (i = 0; i < group_size; ++i)
871     if (!matches[i])
872       return false;
873
874   /* If we allowed a two-operation SLP node verify the target can cope
875      with the permute we are going to use.  */
876   if (alt_stmt_code != ERROR_MARK
877       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
878     {
879       unsigned char *sel
880         = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
881       for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
882         {
883           sel[i] = i;
884           if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
885             sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
886         }
887       if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
888         {
889           for (i = 0; i < group_size; ++i)
890             if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
891               {
892                 matches[i] = false;
893                 if (dump_enabled_p ())
894                   {
895                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
896                                      "Build SLP failed: different operation "
897                                      "in stmt ");
898                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
899                                       stmts[i], 0);
900                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
901                                      "original stmt ");
902                     dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
903                                       first_stmt, 0);
904                   }
905               }
906           return false;
907         }
908       *two_operators = true;
909     }
910
911   return true;
912 }
913
914 /* Recursively build an SLP tree starting from NODE.
915    Fail (and return a value not equal to zero) if def-stmts are not
916    isomorphic, require data permutation or are of unsupported types of
917    operation.  Otherwise, return 0.
918    The value returned is the depth in the SLP tree where a mismatch
919    was found.  */
920
921 static bool
922 vect_build_slp_tree (vec_info *vinfo,
923                      slp_tree *node, unsigned int group_size,
924                      unsigned int *max_nunits,
925                      vec<slp_tree> *loads,
926                      unsigned int vectorization_factor,
927                      bool *matches, unsigned *npermutes, unsigned *tree_size,
928                      unsigned max_tree_size)
929 {
930   unsigned nops, i, this_tree_size = 0;
931   gimple *stmt;
932
933   matches[0] = false;
934
935   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
936   if (is_gimple_call (stmt))
937     nops = gimple_call_num_args (stmt);
938   else if (is_gimple_assign (stmt))
939     {
940       nops = gimple_num_ops (stmt) - 1;
941       if (gimple_assign_rhs_code (stmt) == COND_EXPR)
942         nops++;
943     }
944   else
945     return false;
946
947   bool two_operators = false;
948   if (!vect_build_slp_tree_1 (vinfo,
949                               SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
950                               max_nunits, vectorization_factor, matches,
951                               &two_operators))
952     return false;
953   SLP_TREE_TWO_OPERATORS (*node) = two_operators;
954
955   /* If the SLP node is a load, terminate the recursion.  */
956   if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
957       && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
958     {
959       loads->safe_push (*node);
960       return true;
961     }
962
963   /* Get at the operands, verifying they are compatible.  */
964   vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
965   slp_oprnd_info oprnd_info;
966   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
967     {
968       switch (vect_get_and_check_slp_defs (vinfo, stmt, i, &oprnds_info))
969         {
970         case 0:
971           break;
972         case -1:
973           matches[0] = false;
974           vect_free_oprnd_info (oprnds_info);
975           return false;
976         case 1:
977           matches[i] = false;
978           break;
979         }
980     }
981   for (i = 0; i < group_size; ++i)
982     if (!matches[i])
983       {
984         vect_free_oprnd_info (oprnds_info);
985         return false;
986       }
987
988   stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
989
990   /* Create SLP_TREE nodes for the definition node/s.  */
991   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
992     {
993       slp_tree child;
994       unsigned old_nloads = loads->length ();
995       unsigned old_max_nunits = *max_nunits;
996
997       if (oprnd_info->first_dt != vect_internal_def)
998         continue;
999
1000       if (++this_tree_size > max_tree_size)
1001         {
1002           vect_free_oprnd_info (oprnds_info);
1003           return false;
1004         }
1005
1006       child = vect_create_new_slp_node (oprnd_info->def_stmts);
1007       if (!child)
1008         {
1009           vect_free_oprnd_info (oprnds_info);
1010           return false;
1011         }
1012
1013       if (vect_build_slp_tree (vinfo, &child,
1014                                group_size, max_nunits, loads,
1015                                vectorization_factor, matches,
1016                                npermutes, &this_tree_size, max_tree_size))
1017         {
1018           /* If we have all children of child built up from scalars then just
1019              throw that away and build it up this node from scalars.  */
1020           if (!SLP_TREE_CHILDREN (child).is_empty ())
1021             {
1022               unsigned int j;
1023               slp_tree grandchild;
1024
1025               FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1026                 if (grandchild != NULL)
1027                   break;
1028               if (!grandchild)
1029                 {
1030                   /* Roll back.  */
1031                   *max_nunits = old_max_nunits;
1032                   loads->truncate (old_nloads);
1033                   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1034                       vect_free_slp_tree (grandchild);
1035                   SLP_TREE_CHILDREN (child).truncate (0);
1036
1037                   dump_printf_loc (MSG_NOTE, vect_location,
1038                                    "Building parent vector operands from "
1039                                    "scalars instead\n");
1040                   oprnd_info->def_stmts = vNULL;
1041                   vect_free_slp_tree (child);
1042                   SLP_TREE_CHILDREN (*node).quick_push (NULL);
1043                   continue;
1044                 }
1045             }
1046
1047           oprnd_info->def_stmts = vNULL;
1048           SLP_TREE_CHILDREN (*node).quick_push (child);
1049           continue;
1050         }
1051
1052       /* If the SLP build failed fatally and we analyze a basic-block
1053          simply treat nodes we fail to build as externally defined
1054          (and thus build vectors from the scalar defs).
1055          The cost model will reject outright expensive cases.
1056          ???  This doesn't treat cases where permutation ultimatively
1057          fails (or we don't try permutation below).  Ideally we'd
1058          even compute a permutation that will end up with the maximum
1059          SLP tree size...  */
1060       if (is_a <bb_vec_info> (vinfo)
1061           && !matches[0]
1062           /* ???  Rejecting patterns this way doesn't work.  We'd have to
1063              do extra work to cancel the pattern so the uses see the
1064              scalar version.  */
1065           && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1066         {
1067           unsigned int j;
1068           slp_tree grandchild;
1069
1070           /* Roll back.  */
1071           *max_nunits = old_max_nunits;
1072           loads->truncate (old_nloads);
1073           FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1074             vect_free_slp_tree (grandchild);
1075           SLP_TREE_CHILDREN (child).truncate (0);
1076
1077           dump_printf_loc (MSG_NOTE, vect_location,
1078                            "Building vector operands from scalars\n");
1079           oprnd_info->def_stmts = vNULL;
1080           vect_free_slp_tree (child);
1081           SLP_TREE_CHILDREN (*node).quick_push (NULL);
1082           continue;
1083         }
1084
1085       /* If the SLP build for operand zero failed and operand zero
1086          and one can be commutated try that for the scalar stmts
1087          that failed the match.  */
1088       if (i == 0
1089           /* A first scalar stmt mismatch signals a fatal mismatch.  */
1090           && matches[0]
1091           /* ???  For COND_EXPRs we can swap the comparison operands
1092              as well as the arms under some constraints.  */
1093           && nops == 2
1094           && oprnds_info[1]->first_dt == vect_internal_def
1095           && is_gimple_assign (stmt)
1096           && commutative_tree_code (gimple_assign_rhs_code (stmt))
1097           && !SLP_TREE_TWO_OPERATORS (*node)
1098           /* Do so only if the number of not successful permutes was nor more
1099              than a cut-ff as re-trying the recursive match on
1100              possibly each level of the tree would expose exponential
1101              behavior.  */
1102           && *npermutes < 4)
1103         {
1104           unsigned int j;
1105           slp_tree grandchild;
1106
1107           /* Roll back.  */
1108           *max_nunits = old_max_nunits;
1109           loads->truncate (old_nloads);
1110           FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1111             vect_free_slp_tree (grandchild);
1112           SLP_TREE_CHILDREN (child).truncate (0);
1113
1114           /* Swap mismatched definition stmts.  */
1115           dump_printf_loc (MSG_NOTE, vect_location,
1116                            "Re-trying with swapped operands of stmts ");
1117           for (j = 0; j < group_size; ++j)
1118             if (!matches[j])
1119               {
1120                 std::swap (oprnds_info[0]->def_stmts[j],
1121                            oprnds_info[1]->def_stmts[j]);
1122                 dump_printf (MSG_NOTE, "%d ", j);
1123               }
1124           dump_printf (MSG_NOTE, "\n");
1125           /* And try again with scratch 'matches' ... */
1126           bool *tem = XALLOCAVEC (bool, group_size);
1127           if (vect_build_slp_tree (vinfo, &child,
1128                                    group_size, max_nunits, loads,
1129                                    vectorization_factor,
1130                                    tem, npermutes, &this_tree_size,
1131                                    max_tree_size))
1132             {
1133               /* ... so if successful we can apply the operand swapping
1134                  to the GIMPLE IL.  This is necessary because for example
1135                  vect_get_slp_defs uses operand indexes and thus expects
1136                  canonical operand order.  */
1137               for (j = 0; j < group_size; ++j)
1138                 if (!matches[j])
1139                   {
1140                     gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
1141                     swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1142                                        gimple_assign_rhs2_ptr (stmt));
1143                   }
1144               oprnd_info->def_stmts = vNULL;
1145               SLP_TREE_CHILDREN (*node).quick_push (child);
1146               continue;
1147             }
1148
1149           ++*npermutes;
1150         }
1151
1152       oprnd_info->def_stmts = vNULL;
1153       vect_free_slp_tree (child);
1154       vect_free_oprnd_info (oprnds_info);
1155       return false;
1156     }
1157
1158   if (tree_size)
1159     *tree_size += this_tree_size;
1160
1161   vect_free_oprnd_info (oprnds_info);
1162   return true;
1163 }
1164
1165 /* Dump a slp tree NODE using flags specified in DUMP_KIND.  */
1166
1167 static void
1168 vect_print_slp_tree (int dump_kind, slp_tree node)
1169 {
1170   int i;
1171   gimple *stmt;
1172   slp_tree child;
1173
1174   if (!node)
1175     return;
1176
1177   dump_printf (dump_kind, "node ");
1178   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1179     {
1180       dump_printf (dump_kind, "\n\tstmt %d ", i);
1181       dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1182     }
1183   dump_printf (dump_kind, "\n");
1184
1185   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1186     vect_print_slp_tree (dump_kind, child);
1187 }
1188
1189
1190 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1191    If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1192    J).  Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1193    stmts in NODE are to be marked.  */
1194
1195 static void
1196 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1197 {
1198   int i;
1199   gimple *stmt;
1200   slp_tree child;
1201
1202   if (!node)
1203     return;
1204
1205   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1206     if (j < 0 || i == j)
1207       STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1208
1209   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1210     vect_mark_slp_stmts (child, mark, j);
1211 }
1212
1213
1214 /* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
1215
1216 static void
1217 vect_mark_slp_stmts_relevant (slp_tree node)
1218 {
1219   int i;
1220   gimple *stmt;
1221   stmt_vec_info stmt_info;
1222   slp_tree child;
1223
1224   if (!node)
1225     return;
1226
1227   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1228     {
1229       stmt_info = vinfo_for_stmt (stmt);
1230       gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1231                   || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1232       STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1233     }
1234
1235   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1236     vect_mark_slp_stmts_relevant (child);
1237 }
1238
1239
1240 /* Rearrange the statements of NODE according to PERMUTATION.  */
1241
1242 static void
1243 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1244                           vec<unsigned> permutation)
1245 {
1246   gimple *stmt;
1247   vec<gimple *> tmp_stmts;
1248   unsigned int i;
1249   slp_tree child;
1250
1251   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1252     vect_slp_rearrange_stmts (child, group_size, permutation);
1253
1254   gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1255   tmp_stmts.create (group_size);
1256   tmp_stmts.quick_grow_cleared (group_size);
1257
1258   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1259     tmp_stmts[permutation[i]] = stmt;
1260
1261   SLP_TREE_SCALAR_STMTS (node).release ();
1262   SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1263 }
1264
1265
1266 /* Attempt to reorder stmts in a reduction chain so that we don't
1267    require any load permutation.  Return true if that was possible,
1268    otherwise return false.  */
1269
1270 static bool
1271 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1272 {
1273   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1274   unsigned int i, j;
1275   sbitmap load_index;
1276   unsigned int lidx;
1277   slp_tree node, load;
1278
1279   /* Compare all the permutation sequences to the first one.  We know
1280      that at least one load is permuted.  */
1281   node = SLP_INSTANCE_LOADS (slp_instn)[0];
1282   if (!node->load_permutation.exists ())
1283     return false;
1284   for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1285     {
1286       if (!load->load_permutation.exists ())
1287         return false;
1288       FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1289         if (lidx != node->load_permutation[j])
1290           return false;
1291     }
1292
1293   /* Check that the loads in the first sequence are different and there
1294      are no gaps between them.  */
1295   load_index = sbitmap_alloc (group_size);
1296   bitmap_clear (load_index);
1297   FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1298     {
1299       if (bitmap_bit_p (load_index, lidx))
1300         {
1301           sbitmap_free (load_index);
1302           return false;
1303         }
1304       bitmap_set_bit (load_index, lidx);
1305     }
1306   for (i = 0; i < group_size; i++)
1307     if (!bitmap_bit_p (load_index, i))
1308       {
1309         sbitmap_free (load_index);
1310         return false;
1311       }
1312   sbitmap_free (load_index);
1313
1314   /* This permutation is valid for reduction.  Since the order of the
1315      statements in the nodes is not important unless they are memory
1316      accesses, we can rearrange the statements in all the nodes
1317      according to the order of the loads.  */
1318   vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1319                             node->load_permutation);
1320
1321   /* We are done, no actual permutations need to be generated.  */
1322   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1323     SLP_TREE_LOAD_PERMUTATION (node).release ();
1324   return true;
1325 }
1326
1327 /* Check if the required load permutations in the SLP instance
1328    SLP_INSTN are supported.  */
1329
1330 static bool
1331 vect_supported_load_permutation_p (slp_instance slp_instn)
1332 {
1333   unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1334   unsigned int i, j, k, next;
1335   slp_tree node;
1336   gimple *stmt, *load, *next_load, *first_load;
1337   struct data_reference *dr;
1338
1339   if (dump_enabled_p ())
1340     {
1341       dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1342       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1343         if (node->load_permutation.exists ())
1344           FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1345             dump_printf (MSG_NOTE, "%d ", next);
1346         else
1347           for (k = 0; k < group_size; ++k)
1348             dump_printf (MSG_NOTE, "%d ", k);
1349       dump_printf (MSG_NOTE, "\n");
1350     }
1351
1352   /* In case of reduction every load permutation is allowed, since the order
1353      of the reduction statements is not important (as opposed to the case of
1354      grouped stores).  The only condition we need to check is that all the
1355      load nodes are of the same size and have the same permutation (and then
1356      rearrange all the nodes of the SLP instance according to this 
1357      permutation).  */
1358
1359   /* Check that all the load nodes are of the same size.  */
1360   /* ???  Can't we assert this? */
1361   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1362     if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1363       return false;
1364
1365   node = SLP_INSTANCE_TREE (slp_instn);
1366   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1367
1368   /* Reduction (there are no data-refs in the root).
1369      In reduction chain the order of the loads is not important.  */
1370   if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1371       && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1372     {
1373       if (vect_attempt_slp_rearrange_stmts (slp_instn))
1374         return true;
1375
1376       /* Fallthru to general load permutation handling.  */
1377     }
1378
1379   /* In basic block vectorization we allow any subchain of an interleaving
1380      chain.
1381      FORNOW: not supported in loop SLP because of realignment compications.  */
1382   if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1383     {
1384       /* Check whether the loads in an instance form a subchain and thus
1385          no permutation is necessary.  */
1386       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1387         {
1388           if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1389             continue;
1390           bool subchain_p = true;
1391           next_load = NULL;
1392           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1393             {
1394               if (j != 0
1395                   && (next_load != load
1396                       || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1397                 {
1398                   subchain_p = false;
1399                   break;
1400                 }
1401               next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1402             }
1403           if (subchain_p)
1404             SLP_TREE_LOAD_PERMUTATION (node).release ();
1405           else
1406             {
1407               /* Verify the permutation can be generated.  */
1408               vec<tree> tem;
1409               if (!vect_transform_slp_perm_load (node, tem, NULL,
1410                                                  1, slp_instn, true))
1411                 {
1412                   dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1413                                    vect_location,
1414                                    "unsupported load permutation\n");
1415                   return false;
1416                 }
1417             }
1418         }
1419
1420       /* Check that the alignment of the first load in every subchain, i.e.,
1421          the first statement in every load node, is supported.
1422          ???  This belongs in alignment checking.  */
1423       FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1424         {
1425           first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1426           if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1427             {
1428               dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1429               if (vect_supportable_dr_alignment (dr, false)
1430                   == dr_unaligned_unsupported)
1431                 {
1432                   if (dump_enabled_p ())
1433                     {
1434                       dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1435                                        vect_location,
1436                                        "unsupported unaligned load ");
1437                       dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1438                                         first_load, 0);
1439                       dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1440                     }
1441                   return false;
1442                 }
1443             }
1444         }
1445
1446       return true;
1447     }
1448
1449   /* For loop vectorization verify we can generate the permutation.  */
1450   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1451     if (node->load_permutation.exists ()
1452         && !vect_transform_slp_perm_load
1453               (node, vNULL, NULL,
1454                SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1455       return false;
1456
1457   return true;
1458 }
1459
1460
1461 /* Find the last store in SLP INSTANCE.  */
1462
1463 static gimple *
1464 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1465 {
1466   gimple *last = NULL, *stmt;
1467
1468   for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1469     {
1470       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1471       if (is_pattern_stmt_p (stmt_vinfo))
1472         last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1473       else
1474         last = get_later_stmt (stmt, last);
1475     }
1476
1477   return last;
1478 }
1479
1480 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
1481
1482 static void
1483 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1484                          stmt_vector_for_cost *prologue_cost_vec,
1485                          stmt_vector_for_cost *body_cost_vec,
1486                          unsigned ncopies_for_cost)
1487 {
1488   unsigned i;
1489   slp_tree child;
1490   gimple *stmt, *s;
1491   stmt_vec_info stmt_info;
1492   tree lhs;
1493   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1494
1495   /* Recurse down the SLP tree.  */
1496   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1497     if (child)
1498       vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1499                                body_cost_vec, ncopies_for_cost);
1500
1501   /* Look at the first scalar stmt to determine the cost.  */
1502   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1503   stmt_info = vinfo_for_stmt (stmt);
1504   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1505     {
1506       if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1507         vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1508                                vect_uninitialized_def,
1509                                node, prologue_cost_vec, body_cost_vec);
1510       else
1511         {
1512           int i;
1513           gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1514           vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1515                                 node, prologue_cost_vec, body_cost_vec);
1516           /* If the load is permuted record the cost for the permutation.
1517              ???  Loads from multiple chains are let through here only
1518              for a single special case involving complex numbers where
1519              in the end no permutation is necessary.  */
1520           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1521             if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1522                  == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1523                 && vect_get_place_in_interleaving_chain
1524                      (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1525               {
1526                 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1527                                   stmt_info, 0, vect_body);
1528                 break;
1529               }
1530         }
1531     }
1532   else
1533     {
1534       record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1535                         stmt_info, 0, vect_body);
1536       if (SLP_TREE_TWO_OPERATORS (node))
1537         {
1538           record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1539                             stmt_info, 0, vect_body);
1540           record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1541                             stmt_info, 0, vect_body);
1542         }
1543     }
1544
1545   /* Scan operands and account for prologue cost of constants/externals.
1546      ???  This over-estimates cost for multiple uses and should be
1547      re-engineered.  */
1548   lhs = gimple_get_lhs (stmt);
1549   for (i = 0; i < gimple_num_ops (stmt); ++i)
1550     {
1551       tree op = gimple_op (stmt, i);
1552       gimple *def_stmt;
1553       enum vect_def_type dt;
1554       if (!op || op == lhs)
1555         continue;
1556       if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1557         {
1558           /* Without looking at the actual initializer a vector of
1559              constants can be implemented as load from the constant pool.
1560              ???  We need to pass down stmt_info for a vector type
1561              even if it points to the wrong stmt.  */
1562           if (dt == vect_constant_def)
1563             record_stmt_cost (prologue_cost_vec, 1, vector_load,
1564                               stmt_info, 0, vect_prologue);
1565           else if (dt == vect_external_def)
1566             record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1567                               stmt_info, 0, vect_prologue);
1568         }
1569     }
1570 }
1571
1572 /* Compute the cost for the SLP instance INSTANCE.  */
1573
1574 static void
1575 vect_analyze_slp_cost (slp_instance instance, void *data)
1576 {
1577   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1578   unsigned ncopies_for_cost;
1579   stmt_info_for_cost *si;
1580   unsigned i;
1581
1582   if (dump_enabled_p ())
1583     dump_printf_loc (MSG_NOTE, vect_location,
1584                      "=== vect_analyze_slp_cost ===\n");
1585
1586   /* Calculate the number of vector stmts to create based on the unrolling
1587      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1588      GROUP_SIZE / NUNITS otherwise.  */
1589   unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1590   slp_tree node = SLP_INSTANCE_TREE (instance);
1591   stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1592   /* Adjust the group_size by the vectorization factor which is always one
1593      for basic-block vectorization.  */
1594   if (STMT_VINFO_LOOP_VINFO (stmt_info))
1595     group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1596   unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1597   /* For reductions look at a reduction operand in case the reduction
1598      operation is widening like DOT_PROD or SAD.  */
1599   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1600     {
1601       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1602       switch (gimple_assign_rhs_code (stmt))
1603         {
1604         case DOT_PROD_EXPR:
1605         case SAD_EXPR:
1606           nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1607                                 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1608           break;
1609         default:;
1610         }
1611     }
1612   ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1613
1614   prologue_cost_vec.create (10);
1615   body_cost_vec.create (10);
1616   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1617                            &prologue_cost_vec, &body_cost_vec,
1618                            ncopies_for_cost);
1619
1620   /* Record the prologue costs, which were delayed until we were
1621      sure that SLP was successful.  */
1622   FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1623     {
1624       struct _stmt_vec_info *stmt_info
1625         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1626       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1627                             si->misalign, vect_prologue);
1628     }
1629
1630   /* Record the instance's instructions in the target cost model.  */
1631   FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1632     {
1633       struct _stmt_vec_info *stmt_info
1634         = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1635       (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1636                             si->misalign, vect_body);
1637     }
1638
1639   prologue_cost_vec.release ();
1640   body_cost_vec.release ();
1641 }
1642
1643 /* Analyze an SLP instance starting from a group of grouped stores.  Call
1644    vect_build_slp_tree to build a tree of packed stmts if possible.
1645    Return FALSE if it's impossible to SLP any stmt in the loop.  */
1646
1647 static bool
1648 vect_analyze_slp_instance (vec_info *vinfo,
1649                            gimple *stmt, unsigned max_tree_size)
1650 {
1651   slp_instance new_instance;
1652   slp_tree node;
1653   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1654   unsigned int unrolling_factor = 1, nunits;
1655   tree vectype, scalar_type = NULL_TREE;
1656   gimple *next;
1657   unsigned int vectorization_factor = 0;
1658   int i;
1659   unsigned int max_nunits = 0;
1660   vec<slp_tree> loads;
1661   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1662   vec<gimple *> scalar_stmts;
1663
1664   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1665     {
1666       if (dr)
1667         {
1668           scalar_type = TREE_TYPE (DR_REF (dr));
1669           vectype = get_vectype_for_scalar_type (scalar_type);
1670         }
1671       else
1672         {
1673           gcc_assert (is_a <loop_vec_info> (vinfo));
1674           vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1675         }
1676
1677       group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1678     }
1679   else
1680     {
1681       gcc_assert (is_a <loop_vec_info> (vinfo));
1682       vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1683       group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1684     }
1685
1686   if (!vectype)
1687     {
1688       if (dump_enabled_p ())
1689         {
1690           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1691                            "Build SLP failed: unsupported data-type ");
1692           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1693           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1694         }
1695
1696       return false;
1697     }
1698
1699   nunits = TYPE_VECTOR_SUBPARTS (vectype);
1700   if (is_a <loop_vec_info> (vinfo))
1701     vectorization_factor = as_a <loop_vec_info> (vinfo)->vectorization_factor;
1702   else
1703     vectorization_factor = nunits;
1704
1705   /* Calculate the unrolling factor.  */
1706   unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1707   if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
1708     {
1709       if (dump_enabled_p ())
1710         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1711                          "Build SLP failed: unrolling required in basic"
1712                          " block SLP\n");
1713
1714       return false;
1715     }
1716
1717   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
1718   scalar_stmts.create (group_size);
1719   next = stmt;
1720   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1721     {
1722       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
1723       while (next)
1724         {
1725           if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1726               && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1727             scalar_stmts.safe_push (
1728                   STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1729           else
1730             scalar_stmts.safe_push (next);
1731           next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1732         }
1733       /* Mark the first element of the reduction chain as reduction to properly
1734          transform the node.  In the reduction analysis phase only the last
1735          element of the chain is marked as reduction.  */
1736       if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1737         STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
1738     }
1739   else
1740     {
1741       /* Collect reduction statements.  */
1742       vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1743       for (i = 0; reductions.iterate (i, &next); i++)
1744         scalar_stmts.safe_push (next);
1745     }
1746
1747   node = vect_create_new_slp_node (scalar_stmts);
1748
1749   loads.create (group_size);
1750
1751   /* Build the tree for the SLP instance.  */
1752   bool *matches = XALLOCAVEC (bool, group_size);
1753   unsigned npermutes = 0;
1754   if (vect_build_slp_tree (vinfo, &node, group_size,
1755                            &max_nunits, &loads,
1756                            vectorization_factor, matches, &npermutes, NULL,
1757                            max_tree_size))
1758     {
1759       /* Calculate the unrolling factor based on the smallest type.  */
1760       if (max_nunits > nunits)
1761         unrolling_factor = least_common_multiple (max_nunits, group_size)
1762                            / group_size;
1763
1764       if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
1765         {
1766           if (dump_enabled_p ())
1767             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1768                              "Build SLP failed: unrolling required in basic"
1769                              " block SLP\n");
1770           vect_free_slp_tree (node);
1771           loads.release ();
1772           return false;
1773         }
1774
1775       /* Create a new SLP instance.  */
1776       new_instance = XNEW (struct _slp_instance);
1777       SLP_INSTANCE_TREE (new_instance) = node;
1778       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1779       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1780       SLP_INSTANCE_LOADS (new_instance) = loads;
1781
1782       /* Compute the load permutation.  */
1783       slp_tree load_node;
1784       bool loads_permuted = false;
1785       FOR_EACH_VEC_ELT (loads, i, load_node)
1786         {
1787           vec<unsigned> load_permutation;
1788           int j;
1789           gimple *load, *first_stmt;
1790           bool this_load_permuted = false;
1791           load_permutation.create (group_size);
1792           first_stmt = GROUP_FIRST_ELEMENT
1793               (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1794           FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1795             {
1796               int load_place
1797                 = vect_get_place_in_interleaving_chain (load, first_stmt);
1798               gcc_assert (load_place != -1);
1799               if (load_place != j)
1800                 this_load_permuted = true;
1801               load_permutation.safe_push (load_place);
1802             }
1803           if (!this_load_permuted
1804               /* The load requires permutation when unrolling exposes
1805                  a gap either because the group is larger than the SLP
1806                  group-size or because there is a gap between the groups.  */
1807               && (unrolling_factor == 1
1808                   || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1809                       && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
1810             {
1811               load_permutation.release ();
1812               continue;
1813             }
1814           SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1815           loads_permuted = true;
1816         }
1817
1818       if (loads_permuted)
1819         {
1820           if (!vect_supported_load_permutation_p (new_instance))
1821             {
1822               if (dump_enabled_p ())
1823                 {
1824                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1825                                    "Build SLP failed: unsupported load "
1826                                    "permutation ");
1827                   dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
1828                   dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1829                 }
1830               vect_free_slp_instance (new_instance);
1831               return false;
1832             }
1833         }
1834
1835       vinfo->slp_instances.safe_push (new_instance);
1836
1837       if (dump_enabled_p ())
1838         vect_print_slp_tree (MSG_NOTE, node);
1839
1840       return true;
1841     }
1842
1843   /* Failed to SLP.  */
1844   /* Free the allocated memory.  */
1845   vect_free_slp_tree (node);
1846   loads.release ();
1847
1848   return false;
1849 }
1850
1851
1852 /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
1853    trees of packed scalar stmts if SLP is possible.  */
1854
1855 bool
1856 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
1857 {
1858   unsigned int i;
1859   gimple *first_element;
1860   bool ok = false;
1861
1862   if (dump_enabled_p ())
1863     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
1864
1865   /* Find SLP sequences starting from groups of grouped stores.  */
1866   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
1867     if (vect_analyze_slp_instance (vinfo, first_element, max_tree_size))
1868       ok = true;
1869
1870   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
1871     {
1872       if (loop_vinfo->reduction_chains.length () > 0)
1873         {
1874           /* Find SLP sequences starting from reduction chains.  */
1875           FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
1876               if (vect_analyze_slp_instance (vinfo, first_element,
1877                                              max_tree_size))
1878                 ok = true;
1879               else
1880                 return false;
1881
1882           /* Don't try to vectorize SLP reductions if reduction chain was
1883              detected.  */
1884           return ok;
1885         }
1886
1887       /* Find SLP sequences starting from groups of reductions.  */
1888       if (loop_vinfo->reductions.length () > 1
1889           && vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
1890                                         max_tree_size))
1891         ok = true;
1892     }
1893
1894   return true;
1895 }
1896
1897
1898 /* For each possible SLP instance decide whether to SLP it and calculate overall
1899    unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
1900    least one instance.  */
1901
1902 bool
1903 vect_make_slp_decision (loop_vec_info loop_vinfo)
1904 {
1905   unsigned int i, unrolling_factor = 1;
1906   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1907   slp_instance instance;
1908   int decided_to_slp = 0;
1909
1910   if (dump_enabled_p ())
1911     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1912                      "\n");
1913
1914   FOR_EACH_VEC_ELT (slp_instances, i, instance)
1915     {
1916       /* FORNOW: SLP if you can.  */
1917       if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1918         unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1919
1920       /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts.  Later we
1921          call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1922          loop-based vectorization.  Such stmts will be marked as HYBRID.  */
1923       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1924       decided_to_slp++;
1925     }
1926
1927   LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1928
1929   if (decided_to_slp && dump_enabled_p ())
1930     dump_printf_loc (MSG_NOTE, vect_location,
1931                      "Decided to SLP %d instances. Unrolling factor %d\n",
1932                      decided_to_slp, unrolling_factor);
1933
1934   return (decided_to_slp > 0);
1935 }
1936
1937
1938 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1939    can't be SLPed) in the tree rooted at NODE.  Mark such stmts as HYBRID.  */
1940
1941 static void
1942 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
1943 {
1944   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
1945   imm_use_iterator imm_iter;
1946   gimple *use_stmt;
1947   stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
1948   slp_tree child;
1949   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1950   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1951   int j;
1952
1953   /* Propagate hybrid down the SLP tree.  */
1954   if (stype == hybrid)
1955     ;
1956   else if (HYBRID_SLP_STMT (stmt_vinfo))
1957     stype = hybrid;
1958   else
1959     {
1960       /* Check if a pure SLP stmt has uses in non-SLP stmts.  */
1961       gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
1962       /* We always get the pattern stmt here, but for immediate
1963          uses we have to use the LHS of the original stmt.  */
1964       gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1965       if (STMT_VINFO_RELATED_STMT (stmt_vinfo))
1966         stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1967       if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1968         FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1969           {
1970             if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1971               continue;
1972             use_vinfo = vinfo_for_stmt (use_stmt);
1973             if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
1974                 && STMT_VINFO_RELATED_STMT (use_vinfo))
1975               use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
1976             if (!STMT_SLP_TYPE (use_vinfo)
1977                 && (STMT_VINFO_RELEVANT (use_vinfo)
1978                     || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
1979                 && !(gimple_code (use_stmt) == GIMPLE_PHI
1980                      && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
1981               {
1982                 if (dump_enabled_p ())
1983                   {
1984                     dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
1985                                      "def in non-SLP stmt: ");
1986                     dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
1987                   }
1988                 stype = hybrid;
1989               }
1990           }
1991     }
1992
1993   if (stype == hybrid
1994       && !HYBRID_SLP_STMT (stmt_vinfo))
1995     {
1996       if (dump_enabled_p ())
1997         {
1998           dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
1999           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2000         }
2001       STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2002     }
2003
2004   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2005     if (child)
2006       vect_detect_hybrid_slp_stmts (child, i, stype);
2007 }
2008
2009 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
2010
2011 static tree
2012 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2013 {
2014   walk_stmt_info *wi = (walk_stmt_info *)data;
2015   struct loop *loopp = (struct loop *)wi->info;
2016
2017   if (wi->is_lhs)
2018     return NULL_TREE;
2019
2020   if (TREE_CODE (*tp) == SSA_NAME
2021       && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2022     {
2023       gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2024       if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2025           && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2026         {
2027           if (dump_enabled_p ())
2028             {
2029               dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2030               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2031             }
2032           STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2033         }
2034     }
2035
2036   return NULL_TREE;
2037 }
2038
2039 static tree
2040 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2041                           walk_stmt_info *)
2042 {
2043   /* If the stmt is in a SLP instance then this isn't a reason
2044      to mark use definitions in other SLP instances as hybrid.  */
2045   if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2046     *handled = true;
2047   return NULL_TREE;
2048 }
2049
2050 /* Find stmts that must be both vectorized and SLPed.  */
2051
2052 void
2053 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2054 {
2055   unsigned int i;
2056   vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2057   slp_instance instance;
2058
2059   if (dump_enabled_p ())
2060     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2061                      "\n");
2062
2063   /* First walk all pattern stmt in the loop and mark defs of uses as
2064      hybrid because immediate uses in them are not recorded.  */
2065   for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2066     {
2067       basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2068       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2069            gsi_next (&gsi))
2070         {
2071           gimple *stmt = gsi_stmt (gsi);
2072           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2073           if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2074             {
2075               walk_stmt_info wi;
2076               memset (&wi, 0, sizeof (wi));
2077               wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2078               gimple_stmt_iterator gsi2
2079                 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2080               walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2081                                 vect_detect_hybrid_slp_1, &wi);
2082               walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2083                                vect_detect_hybrid_slp_2,
2084                                vect_detect_hybrid_slp_1, &wi);
2085             }
2086         }
2087     }
2088
2089   /* Then walk the SLP instance trees marking stmts with uses in
2090      non-SLP stmts as hybrid, also propagating hybrid down the
2091      SLP tree, collecting the above info on-the-fly.  */
2092   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2093     {
2094       for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2095         vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2096                                       i, pure_slp);
2097     }
2098 }
2099
2100
2101 /* Create and initialize a new bb_vec_info struct for BB, as well as
2102    stmt_vec_info structs for all the stmts in it.  */
2103
2104 static bb_vec_info
2105 new_bb_vec_info (basic_block bb)
2106 {
2107   bb_vec_info res = NULL;
2108   gimple_stmt_iterator gsi;
2109
2110   res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2111   res->kind = vec_info::bb;
2112   BB_VINFO_BB (res) = bb;
2113
2114   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2115     {
2116       gimple *stmt = gsi_stmt (gsi);
2117       gimple_set_uid (stmt, 0);
2118       set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
2119     }
2120
2121   BB_VINFO_GROUPED_STORES (res).create (10);
2122   BB_VINFO_SLP_INSTANCES (res).create (2);
2123   BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
2124
2125   bb->aux = res;
2126   return res;
2127 }
2128
2129
2130 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2131    stmts in the basic block.  */
2132
2133 static void
2134 destroy_bb_vec_info (bb_vec_info bb_vinfo)
2135 {
2136   vec<slp_instance> slp_instances;
2137   slp_instance instance;
2138   basic_block bb;
2139   gimple_stmt_iterator si;
2140   unsigned i;
2141
2142   if (!bb_vinfo)
2143     return;
2144
2145   bb = BB_VINFO_BB (bb_vinfo);
2146
2147   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2148     {
2149       gimple *stmt = gsi_stmt (si);
2150       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2151
2152       if (stmt_info)
2153         /* Free stmt_vec_info.  */
2154         free_stmt_vec_info (stmt);
2155     }
2156
2157   vect_destroy_datarefs (bb_vinfo);
2158   free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2159   BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2160   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2161   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2162     vect_free_slp_instance (instance);
2163   BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2164   destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
2165   free (bb_vinfo);
2166   bb->aux = NULL;
2167 }
2168
2169
2170 /* Analyze statements contained in SLP tree node after recursively analyzing
2171    the subtree. Return TRUE if the operations are supported.  */
2172
2173 static bool
2174 vect_slp_analyze_node_operations (slp_tree node)
2175 {
2176   bool dummy;
2177   int i;
2178   gimple *stmt;
2179   slp_tree child;
2180
2181   if (!node)
2182     return true;
2183
2184   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2185     if (!vect_slp_analyze_node_operations (child))
2186       return false;
2187
2188   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2189     {
2190       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2191       gcc_assert (stmt_info);
2192       gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2193
2194       if (!vect_analyze_stmt (stmt, &dummy, node))
2195         return false;
2196     }
2197
2198   return true;
2199 }
2200
2201
2202 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
2203    operations are supported. */
2204
2205 bool
2206 vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
2207 {
2208   slp_instance instance;
2209   int i;
2210
2211   if (dump_enabled_p ())
2212     dump_printf_loc (MSG_NOTE, vect_location,
2213                      "=== vect_slp_analyze_operations ===\n");
2214
2215   for (i = 0; slp_instances.iterate (i, &instance); )
2216     {
2217       if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
2218         {
2219           dump_printf_loc (MSG_NOTE, vect_location,
2220                            "removing SLP instance operations starting from: ");
2221           dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2222                             SLP_TREE_SCALAR_STMTS
2223                               (SLP_INSTANCE_TREE (instance))[0], 0);
2224           vect_free_slp_instance (instance);
2225           slp_instances.ordered_remove (i);
2226         }
2227       else
2228         {
2229           /* Compute the costs of the SLP instance.  */
2230           vect_analyze_slp_cost (instance, data);
2231           i++;
2232         }
2233     }
2234
2235   if (!slp_instances.length ())
2236     return false;
2237
2238   return true;
2239 }
2240
2241
2242 /* Compute the scalar cost of the SLP node NODE and its children
2243    and return it.  Do not account defs that are marked in LIFE and
2244    update LIFE according to uses of NODE.  */
2245
2246 static unsigned
2247 vect_bb_slp_scalar_cost (basic_block bb,
2248                          slp_tree node, vec<bool, va_heap> *life)
2249 {
2250   unsigned scalar_cost = 0;
2251   unsigned i;
2252   gimple *stmt;
2253   slp_tree child;
2254
2255   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2256     {
2257       unsigned stmt_cost;
2258       ssa_op_iter op_iter;
2259       def_operand_p def_p;
2260       stmt_vec_info stmt_info;
2261
2262       if ((*life)[i])
2263         continue;
2264
2265       /* If there is a non-vectorized use of the defs then the scalar
2266          stmt is kept live in which case we do not account it or any
2267          required defs in the SLP children in the scalar cost.  This
2268          way we make the vectorization more costly when compared to
2269          the scalar cost.  */
2270       FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2271         {
2272           imm_use_iterator use_iter;
2273           gimple *use_stmt;
2274           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2275             if (!is_gimple_debug (use_stmt)
2276                 && (gimple_code (use_stmt) == GIMPLE_PHI
2277                     || gimple_bb (use_stmt) != bb
2278                     || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
2279               {
2280                 (*life)[i] = true;
2281                 BREAK_FROM_IMM_USE_STMT (use_iter);
2282               }
2283         }
2284       if ((*life)[i])
2285         continue;
2286
2287       stmt_info = vinfo_for_stmt (stmt);
2288       if (STMT_VINFO_DATA_REF (stmt_info))
2289         {
2290           if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2291             stmt_cost = vect_get_stmt_cost (scalar_load);
2292           else
2293             stmt_cost = vect_get_stmt_cost (scalar_store);
2294         }
2295       else
2296         stmt_cost = vect_get_stmt_cost (scalar_stmt);
2297
2298       scalar_cost += stmt_cost;
2299     }
2300
2301   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2302     if (child)
2303       scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
2304
2305   return scalar_cost;
2306 }
2307
2308 /* Check if vectorization of the basic block is profitable.  */
2309
2310 static bool
2311 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2312 {
2313   vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2314   slp_instance instance;
2315   int i;
2316   unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2317   unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2318
2319   /* Calculate scalar cost.  */
2320   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2321     {
2322       auto_vec<bool, 20> life;
2323       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2324       scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2325                                               SLP_INSTANCE_TREE (instance),
2326                                               &life);
2327     }
2328
2329   /* Complete the target-specific cost calculation.  */
2330   finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2331                &vec_inside_cost, &vec_epilogue_cost);
2332
2333   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2334
2335   if (dump_enabled_p ())
2336     {
2337       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2338       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
2339                    vec_inside_cost);
2340       dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n", vec_prologue_cost);
2341       dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n", vec_epilogue_cost);
2342       dump_printf (MSG_NOTE, "  Scalar cost of basic block: %d\n", scalar_cost);
2343     }
2344
2345   /* Vectorization is profitable if its cost is less than the cost of scalar
2346      version.  */
2347   if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2348     return false;
2349
2350   return true;
2351 }
2352
2353 /* Check if the basic block can be vectorized.  */
2354
2355 static bb_vec_info
2356 vect_slp_analyze_bb_1 (basic_block bb)
2357 {
2358   bb_vec_info bb_vinfo;
2359   vec<slp_instance> slp_instances;
2360   slp_instance instance;
2361   int i;
2362   int min_vf = 2;
2363   unsigned n_stmts = 0;
2364
2365   bb_vinfo = new_bb_vec_info (bb);
2366   if (!bb_vinfo)
2367     return NULL;
2368
2369   if (!vect_analyze_data_refs (bb_vinfo, &min_vf, &n_stmts))
2370     {
2371       if (dump_enabled_p ())
2372         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2373                          "not vectorized: unhandled data-ref in basic "
2374                          "block.\n");
2375
2376       destroy_bb_vec_info (bb_vinfo);
2377       return NULL;
2378     }
2379
2380   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2381     {
2382       if (dump_enabled_p ())
2383         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2384                          "not vectorized: not enough data-refs in "
2385                          "basic block.\n");
2386
2387       destroy_bb_vec_info (bb_vinfo);
2388       return NULL;
2389     }
2390
2391   if (!vect_analyze_data_ref_accesses (bb_vinfo))
2392     {
2393      if (dump_enabled_p ())
2394        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2395                         "not vectorized: unhandled data access in "
2396                         "basic block.\n");
2397
2398       destroy_bb_vec_info (bb_vinfo);
2399       return NULL;
2400     }
2401
2402   vect_pattern_recog (bb_vinfo);
2403
2404   if (!vect_analyze_data_refs_alignment (bb_vinfo))
2405     {
2406       if (dump_enabled_p ())
2407         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2408                          "not vectorized: bad data alignment in basic "
2409                          "block.\n");
2410
2411       destroy_bb_vec_info (bb_vinfo);
2412       return NULL;
2413     }
2414
2415   /* Check the SLP opportunities in the basic block, analyze and build SLP
2416      trees.  */
2417   if (!vect_analyze_slp (bb_vinfo, n_stmts))
2418     {
2419       if (dump_enabled_p ())
2420         {
2421           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2422                            "Failed to SLP the basic block.\n");
2423           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 
2424                            "not vectorized: failed to find SLP opportunities "
2425                            "in basic block.\n");
2426         }
2427
2428       destroy_bb_vec_info (bb_vinfo);
2429       return NULL;
2430     }
2431
2432   slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2433
2434   /* Mark all the statements that we want to vectorize as pure SLP and
2435      relevant.  */
2436   FOR_EACH_VEC_ELT (slp_instances, i, instance)
2437     {
2438       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2439       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2440     }
2441
2442   /* Mark all the statements that we do not want to vectorize.  */
2443   for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2444        !gsi_end_p (gsi); gsi_next (&gsi))
2445     {
2446       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2447       if (STMT_SLP_TYPE (vinfo) != pure_slp)
2448         STMT_VINFO_VECTORIZABLE (vinfo) = false;
2449     }
2450
2451   /* Analyze dependences.  At this point all stmts not participating in
2452      vectorization have to be marked.  Dependence analysis assumes
2453      that we either vectorize all SLP instances or none at all.  */
2454   if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2455      {
2456        if (dump_enabled_p ())
2457          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2458                           "not vectorized: unhandled data dependence "
2459                           "in basic block.\n");
2460
2461        destroy_bb_vec_info (bb_vinfo);
2462        return NULL;
2463      }
2464
2465   if (!vect_verify_datarefs_alignment (bb_vinfo))
2466     {
2467       if (dump_enabled_p ())
2468         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2469                          "not vectorized: unsupported alignment in basic "
2470                          "block.\n");
2471       destroy_bb_vec_info (bb_vinfo);
2472       return NULL;
2473     }
2474
2475   if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2476                                     BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
2477     {
2478       if (dump_enabled_p ())
2479         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2480                          "not vectorized: bad operation in basic block.\n");
2481
2482       destroy_bb_vec_info (bb_vinfo);
2483       return NULL;
2484     }
2485
2486   /* Cost model: check if the vectorization is worthwhile.  */
2487   if (!unlimited_cost_model (NULL)
2488       && !vect_bb_vectorization_profitable_p (bb_vinfo))
2489     {
2490       if (dump_enabled_p ())
2491         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2492                          "not vectorized: vectorization is not "
2493                          "profitable.\n");
2494
2495       destroy_bb_vec_info (bb_vinfo);
2496       return NULL;
2497     }
2498
2499   if (dump_enabled_p ())
2500     dump_printf_loc (MSG_NOTE, vect_location,
2501                      "Basic block will be vectorized using SLP\n");
2502
2503   return bb_vinfo;
2504 }
2505
2506
2507 bb_vec_info
2508 vect_slp_analyze_bb (basic_block bb)
2509 {
2510   bb_vec_info bb_vinfo;
2511   int insns = 0;
2512   gimple_stmt_iterator gsi;
2513   unsigned int vector_sizes;
2514
2515   if (dump_enabled_p ())
2516     dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
2517
2518   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2519     {
2520       gimple *stmt = gsi_stmt (gsi);
2521       if (!is_gimple_debug (stmt)
2522           && !gimple_nop_p (stmt)
2523           && gimple_code (stmt) != GIMPLE_LABEL)
2524         insns++;
2525     }
2526
2527   if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2528     {
2529       if (dump_enabled_p ())
2530         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2531                          "not vectorized: too many instructions in "
2532                          "basic block.\n");
2533
2534       return NULL;
2535     }
2536
2537   /* Autodetect first vector size we try.  */
2538   current_vector_size = 0;
2539   vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2540
2541   while (1)
2542     {
2543       bb_vinfo = vect_slp_analyze_bb_1 (bb);
2544       if (bb_vinfo)
2545         return bb_vinfo;
2546
2547       destroy_bb_vec_info (bb_vinfo);
2548
2549       vector_sizes &= ~current_vector_size;
2550       if (vector_sizes == 0
2551           || current_vector_size == 0)
2552         return NULL;
2553
2554       /* Try the next biggest vector size.  */
2555       current_vector_size = 1 << floor_log2 (vector_sizes);
2556       if (dump_enabled_p ())
2557         dump_printf_loc (MSG_NOTE, vect_location,
2558                          "***** Re-trying analysis with "
2559                          "vector size %d\n", current_vector_size);
2560     }
2561 }
2562
2563
2564 /* For constant and loop invariant defs of SLP_NODE this function returns
2565    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2566    OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2567    scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
2568    REDUC_INDEX is the index of the reduction operand in the statements, unless
2569    it is -1.  */
2570
2571 static void
2572 vect_get_constant_vectors (tree op, slp_tree slp_node,
2573                            vec<tree> *vec_oprnds,
2574                            unsigned int op_num, unsigned int number_of_vectors,
2575                            int reduc_index)
2576 {
2577   vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2578   gimple *stmt = stmts[0];
2579   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2580   unsigned nunits;
2581   tree vec_cst;
2582   tree *elts;
2583   unsigned j, number_of_places_left_in_vector;
2584   tree vector_type;
2585   tree vop;
2586   int group_size = stmts.length ();
2587   unsigned int vec_num, i;
2588   unsigned number_of_copies = 1;
2589   vec<tree> voprnds;
2590   voprnds.create (number_of_vectors);
2591   bool constant_p, is_store;
2592   tree neutral_op = NULL;
2593   enum tree_code code = gimple_expr_code (stmt);
2594   gimple *def_stmt;
2595   struct loop *loop;
2596   gimple_seq ctor_seq = NULL;
2597
2598   vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2599   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2600
2601   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2602       && reduc_index != -1)
2603     {
2604       op_num = reduc_index;
2605       op = gimple_op (stmt, op_num + 1);
2606       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2607          we need either neutral operands or the original operands.  See
2608          get_initial_def_for_reduction() for details.  */
2609       switch (code)
2610         {
2611           case WIDEN_SUM_EXPR:
2612           case DOT_PROD_EXPR:
2613           case SAD_EXPR:
2614           case PLUS_EXPR:
2615           case MINUS_EXPR:
2616           case BIT_IOR_EXPR:
2617           case BIT_XOR_EXPR:
2618              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2619                neutral_op = build_real (TREE_TYPE (op), dconst0);
2620              else
2621                neutral_op = build_int_cst (TREE_TYPE (op), 0);
2622
2623              break;
2624
2625           case MULT_EXPR:
2626              if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2627                neutral_op = build_real (TREE_TYPE (op), dconst1);
2628              else
2629                neutral_op = build_int_cst (TREE_TYPE (op), 1);
2630
2631              break;
2632
2633           case BIT_AND_EXPR:
2634             neutral_op = build_int_cst (TREE_TYPE (op), -1);
2635             break;
2636
2637           /* For MIN/MAX we don't have an easy neutral operand but
2638              the initial values can be used fine here.  Only for
2639              a reduction chain we have to force a neutral element.  */
2640           case MAX_EXPR:
2641           case MIN_EXPR:
2642             if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2643               neutral_op = NULL;
2644             else
2645               {
2646                 def_stmt = SSA_NAME_DEF_STMT (op);
2647                 loop = (gimple_bb (stmt))->loop_father;
2648                 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2649                                                     loop_preheader_edge (loop));
2650               }
2651             break;
2652
2653           default:
2654             gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
2655             neutral_op = NULL;
2656         }
2657     }
2658
2659   if (STMT_VINFO_DATA_REF (stmt_vinfo))
2660     {
2661       is_store = true;
2662       op = gimple_assign_rhs1 (stmt);
2663     }
2664   else
2665     is_store = false;
2666
2667   gcc_assert (op);
2668
2669   if (CONSTANT_CLASS_P (op))
2670     constant_p = true;
2671   else
2672     constant_p = false;
2673
2674   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2675      created vectors. It is greater than 1 if unrolling is performed.
2676
2677      For example, we have two scalar operands, s1 and s2 (e.g., group of
2678      strided accesses of size two), while NUNITS is four (i.e., four scalars
2679      of this type can be packed in a vector).  The output vector will contain
2680      two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
2681      will be 2).
2682
2683      If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2684      containing the operands.
2685
2686      For example, NUNITS is four as before, and the group size is 8
2687      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
2688      {s5, s6, s7, s8}.  */
2689
2690   number_of_copies = nunits * number_of_vectors / group_size;
2691
2692   number_of_places_left_in_vector = nunits;
2693   elts = XALLOCAVEC (tree, nunits);
2694   bool place_after_defs = false;
2695   for (j = 0; j < number_of_copies; j++)
2696     {
2697       for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
2698         {
2699           if (is_store)
2700             op = gimple_assign_rhs1 (stmt);
2701           else
2702             {
2703               switch (code)
2704                 {
2705                   case COND_EXPR:
2706                     if (op_num == 0 || op_num == 1)
2707                       {
2708                         tree cond = gimple_assign_rhs1 (stmt);
2709                         op = TREE_OPERAND (cond, op_num);
2710                       }
2711                     else
2712                       {
2713                         if (op_num == 2)
2714                           op = gimple_assign_rhs2 (stmt);
2715                         else
2716                           op = gimple_assign_rhs3 (stmt);
2717                       }
2718                     break;
2719
2720                   case CALL_EXPR:
2721                     op = gimple_call_arg (stmt, op_num);
2722                     break;
2723
2724                   case LSHIFT_EXPR:
2725                   case RSHIFT_EXPR:
2726                   case LROTATE_EXPR:
2727                   case RROTATE_EXPR:
2728                     op = gimple_op (stmt, op_num + 1);
2729                     /* Unlike the other binary operators, shifts/rotates have
2730                        the shift count being int, instead of the same type as
2731                        the lhs, so make sure the scalar is the right type if
2732                        we are dealing with vectors of
2733                        long long/long/short/char.  */
2734                     if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
2735                       op = fold_convert (TREE_TYPE (vector_type), op);
2736                     break;
2737
2738                   default:
2739                     op = gimple_op (stmt, op_num + 1);
2740                     break;
2741                 }
2742             }
2743
2744           if (reduc_index != -1)
2745             {
2746               loop = (gimple_bb (stmt))->loop_father;
2747               def_stmt = SSA_NAME_DEF_STMT (op);
2748
2749               gcc_assert (loop);
2750
2751               /* Get the def before the loop.  In reduction chain we have only
2752                  one initial value.  */
2753               if ((j != (number_of_copies - 1)
2754                    || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2755                        && i != 0))
2756                   && neutral_op)
2757                 op = neutral_op;
2758               else
2759                 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2760                                             loop_preheader_edge (loop));
2761             }
2762
2763           /* Create 'vect_ = {op0,op1,...,opn}'.  */
2764           number_of_places_left_in_vector--;
2765           tree orig_op = op;
2766           if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2767             {
2768               if (CONSTANT_CLASS_P (op))
2769                 {
2770                   op = fold_unary (VIEW_CONVERT_EXPR,
2771                                    TREE_TYPE (vector_type), op);
2772                   gcc_assert (op && CONSTANT_CLASS_P (op));
2773                 }
2774               else
2775                 {
2776                   tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
2777                   gimple *init_stmt;
2778                   op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
2779                   init_stmt
2780                     = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
2781                   gimple_seq_add_stmt (&ctor_seq, init_stmt);
2782                   op = new_temp;
2783                 }
2784             }
2785           elts[number_of_places_left_in_vector] = op;
2786           if (!CONSTANT_CLASS_P (op))
2787             constant_p = false;
2788           if (TREE_CODE (orig_op) == SSA_NAME
2789               && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2790               && STMT_VINFO_BB_VINFO (stmt_vinfo)
2791               && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2792                   == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2793             place_after_defs = true;
2794
2795           if (number_of_places_left_in_vector == 0)
2796             {
2797               number_of_places_left_in_vector = nunits;
2798
2799               if (constant_p)
2800                 vec_cst = build_vector (vector_type, elts);
2801               else
2802                 {
2803                   vec<constructor_elt, va_gc> *v;
2804                   unsigned k;
2805                   vec_alloc (v, nunits);
2806                   for (k = 0; k < nunits; ++k)
2807                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2808                   vec_cst = build_constructor (vector_type, v);
2809                 }
2810               tree init;
2811               gimple_stmt_iterator gsi;
2812               if (place_after_defs)
2813                 {
2814                   gsi = gsi_for_stmt
2815                           (vect_find_last_scalar_stmt_in_slp (slp_node));
2816                   init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2817                 }
2818               else
2819                 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
2820               if (ctor_seq != NULL)
2821                 {
2822                   gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
2823                   gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2824                                                         GSI_SAME_STMT);
2825                   ctor_seq = NULL;
2826                 }
2827               voprnds.quick_push (init);
2828               place_after_defs = false;
2829             }
2830         }
2831     }
2832
2833   /* Since the vectors are created in the reverse order, we should invert
2834      them.  */
2835   vec_num = voprnds.length ();
2836   for (j = vec_num; j != 0; j--)
2837     {
2838       vop = voprnds[j - 1];
2839       vec_oprnds->quick_push (vop);
2840     }
2841
2842   voprnds.release ();
2843
2844   /* In case that VF is greater than the unrolling factor needed for the SLP
2845      group of stmts, NUMBER_OF_VECTORS to be created is greater than
2846      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2847      to replicate the vectors.  */
2848   while (number_of_vectors > vec_oprnds->length ())
2849     {
2850       tree neutral_vec = NULL;
2851
2852       if (neutral_op)
2853         {
2854           if (!neutral_vec)
2855             neutral_vec = build_vector_from_val (vector_type, neutral_op);
2856
2857           vec_oprnds->quick_push (neutral_vec);
2858         }
2859       else
2860         {
2861           for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2862             vec_oprnds->quick_push (vop);
2863         }
2864     }
2865 }
2866
2867
2868 /* Get vectorized definitions from SLP_NODE that contains corresponding
2869    vectorized def-stmts.  */
2870
2871 static void
2872 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
2873 {
2874   tree vec_oprnd;
2875   gimple *vec_def_stmt;
2876   unsigned int i;
2877
2878   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
2879
2880   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
2881     {
2882       gcc_assert (vec_def_stmt);
2883       vec_oprnd = gimple_get_lhs (vec_def_stmt);
2884       vec_oprnds->quick_push (vec_oprnd);
2885     }
2886 }
2887
2888
2889 /* Get vectorized definitions for SLP_NODE.
2890    If the scalar definitions are loop invariants or constants, collect them and
2891    call vect_get_constant_vectors() to create vector stmts.
2892    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2893    must be stored in the corresponding child of SLP_NODE, and we call
2894    vect_get_slp_vect_defs () to retrieve them.  */
2895
2896 void
2897 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
2898                    vec<vec<tree> > *vec_oprnds, int reduc_index)
2899 {
2900   gimple *first_stmt;
2901   int number_of_vects = 0, i;
2902   unsigned int child_index = 0;
2903   HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
2904   slp_tree child = NULL;
2905   vec<tree> vec_defs;
2906   tree oprnd;
2907   bool vectorized_defs;
2908
2909   first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2910   FOR_EACH_VEC_ELT (ops, i, oprnd)
2911     {
2912       /* For each operand we check if it has vectorized definitions in a child
2913          node or we need to create them (for invariants and constants).  We
2914          check if the LHS of the first stmt of the next child matches OPRND.
2915          If it does, we found the correct child.  Otherwise, we call
2916          vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2917          to check this child node for the next operand.  */
2918       vectorized_defs = false;
2919       if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
2920         {
2921           child = SLP_TREE_CHILDREN (slp_node)[child_index];
2922
2923           /* We have to check both pattern and original def, if available.  */
2924           if (child)
2925             {
2926               gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2927               gimple *related
2928                 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2929
2930               if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2931                   || (related
2932                       && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2933                 {
2934                   /* The number of vector defs is determined by the number of
2935                      vector statements in the node from which we get those
2936                      statements.  */
2937                   number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2938                   vectorized_defs = true;
2939                   child_index++;
2940                 }
2941             }
2942           else
2943             child_index++;
2944         }
2945
2946       if (!vectorized_defs)
2947         {
2948           if (i == 0)
2949             {
2950               number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2951               /* Number of vector stmts was calculated according to LHS in
2952                  vect_schedule_slp_instance (), fix it by replacing LHS with
2953                  RHS, if necessary.  See vect_get_smallest_scalar_type () for
2954                  details.  */
2955               vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2956                                              &rhs_size_unit);
2957               if (rhs_size_unit != lhs_size_unit)
2958                 {
2959                   number_of_vects *= rhs_size_unit;
2960                   number_of_vects /= lhs_size_unit;
2961                 }
2962             }
2963         }
2964
2965       /* Allocate memory for vectorized defs.  */
2966       vec_defs = vNULL;
2967       vec_defs.create (number_of_vects);
2968
2969       /* For reduction defs we call vect_get_constant_vectors (), since we are
2970          looking for initial loop invariant values.  */
2971       if (vectorized_defs && reduc_index == -1)
2972         /* The defs are already vectorized.  */
2973         vect_get_slp_vect_defs (child, &vec_defs);
2974       else
2975         /* Build vectors from scalar defs.  */
2976         vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2977                                    number_of_vects, reduc_index);
2978
2979       vec_oprnds->quick_push (vec_defs);
2980
2981       /* For reductions, we only need initial values.  */
2982       if (reduc_index != -1)
2983         return;
2984     }
2985 }
2986
2987
2988 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2989    building a vector of type MASK_TYPE from it) and two input vectors placed in
2990    DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2991    shifting by STRIDE elements of DR_CHAIN for every copy.
2992    (STRIDE is the number of vectorized stmts for NODE divided by the number of
2993    copies).
2994    VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2995    the created stmts must be inserted.  */
2996
2997 static inline void
2998 vect_create_mask_and_perm (gimple *stmt,
2999                            tree mask, int first_vec_indx, int second_vec_indx,
3000                            gimple_stmt_iterator *gsi, slp_tree node,
3001                            tree vectype, vec<tree> dr_chain,
3002                            int ncopies, int vect_stmts_counter)
3003 {
3004   tree perm_dest;
3005   gimple *perm_stmt = NULL;
3006   int i, stride;
3007   tree first_vec, second_vec, data_ref;
3008
3009   stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3010
3011   /* Initialize the vect stmts of NODE to properly insert the generated
3012      stmts later.  */
3013   for (i = SLP_TREE_VEC_STMTS (node).length ();
3014        i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3015     SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3016
3017   perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3018   for (i = 0; i < ncopies; i++)
3019     {
3020       first_vec = dr_chain[first_vec_indx];
3021       second_vec = dr_chain[second_vec_indx];
3022
3023       /* Generate the permute statement.  */
3024       perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3025                                        first_vec, second_vec, mask);
3026       data_ref = make_ssa_name (perm_dest, perm_stmt);
3027       gimple_set_lhs (perm_stmt, data_ref);
3028       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3029
3030       /* Store the vector statement in NODE.  */
3031       SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
3032
3033       first_vec_indx += stride;
3034       second_vec_indx += stride;
3035     }
3036 }
3037
3038
3039 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
3040    return in CURRENT_MASK_ELEMENT its equivalent in target specific
3041    representation.  Check that the mask is valid and return FALSE if not.
3042    Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3043    the next vector, i.e., the current first vector is not needed.  */
3044
3045 static bool
3046 vect_get_mask_element (gimple *stmt, int first_mask_element, int m,
3047                        int mask_nunits, bool only_one_vec, int index,
3048                        unsigned char *mask, int *current_mask_element,
3049                        bool *need_next_vector, int *number_of_mask_fixes,
3050                        bool *mask_fixed, bool *needs_first_vector)
3051 {
3052   int i;
3053
3054   /* Convert to target specific representation.  */
3055   *current_mask_element = first_mask_element + m;
3056   /* Adjust the value in case it's a mask for second and third vectors.  */
3057   *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
3058
3059   if (*current_mask_element < 0)
3060     {
3061       if (dump_enabled_p ())
3062         {
3063           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3064                            "permutation requires past vector ");
3065           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3066           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3067         }
3068       return false;
3069     }
3070
3071   if (*current_mask_element < mask_nunits)
3072     *needs_first_vector = true;
3073
3074   /* We have only one input vector to permute but the mask accesses values in
3075      the next vector as well.  */
3076   if (only_one_vec && *current_mask_element >= mask_nunits)
3077     {
3078       if (dump_enabled_p ())
3079         {
3080           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3081                            "permutation requires at least two vectors ");
3082           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3083           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3084         }
3085
3086       return false;
3087     }
3088
3089   /* The mask requires the next vector.  */
3090   while (*current_mask_element >= mask_nunits * 2)
3091     {
3092       if (*needs_first_vector || *mask_fixed)
3093         {
3094           /* We either need the first vector too or have already moved to the
3095              next vector. In both cases, this permutation needs three
3096              vectors.  */
3097           if (dump_enabled_p ())
3098             {
3099               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3100                                "permutation requires at "
3101                                "least three vectors ");
3102               dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3103               dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3104             }
3105
3106           return false;
3107         }
3108
3109       /* We move to the next vector, dropping the first one and working with
3110          the second and the third - we need to adjust the values of the mask
3111          accordingly.  */
3112       *current_mask_element -= mask_nunits * *number_of_mask_fixes;
3113
3114       for (i = 0; i < index; i++)
3115         mask[i] -= mask_nunits * *number_of_mask_fixes;
3116
3117       (*number_of_mask_fixes)++;
3118       *mask_fixed = true;
3119     }
3120
3121   *need_next_vector = *mask_fixed;
3122
3123   /* This was the last element of this mask. Start a new one.  */
3124   if (index == mask_nunits - 1)
3125     {
3126       *number_of_mask_fixes = 1;
3127       *mask_fixed = false;
3128       *needs_first_vector = false;
3129     }
3130
3131   return true;
3132 }
3133
3134
3135 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3136    If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3137    permute statements for the SLP node NODE of the SLP instance
3138    SLP_NODE_INSTANCE.  */
3139
3140 bool
3141 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3142                               gimple_stmt_iterator *gsi, int vf,
3143                               slp_instance slp_node_instance, bool analyze_only)
3144 {
3145   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3146   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3147   tree mask_element_type = NULL_TREE, mask_type;
3148   int i, j, k, nunits, vec_index = 0;
3149   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3150   int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3151   int first_mask_element;
3152   int index, unroll_factor, current_mask_element, ncopies;
3153   unsigned char *mask;
3154   bool only_one_vec = false, need_next_vector = false;
3155   int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
3156   int number_of_mask_fixes = 1;
3157   bool mask_fixed = false;
3158   bool needs_first_vector = false;
3159   machine_mode mode;
3160
3161   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3162     return false;
3163
3164   stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3165
3166   mode = TYPE_MODE (vectype);
3167
3168   if (!can_vec_perm_p (mode, false, NULL))
3169     {
3170       if (dump_enabled_p ())
3171         {
3172           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3173                            "no vect permute for ");
3174           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3175           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3176         }
3177       return false;
3178     }
3179
3180   /* The generic VEC_PERM_EXPR code always uses an integral type of the
3181      same size as the vector element being permuted.  */
3182   mask_element_type = lang_hooks.types.type_for_mode
3183                 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
3184   mask_type = get_vectype_for_scalar_type (mask_element_type);
3185   nunits = TYPE_VECTOR_SUBPARTS (vectype);
3186   mask = XALLOCAVEC (unsigned char, nunits);
3187   unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3188
3189   /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3190      unrolling factor.  */
3191   orig_vec_stmts_num
3192     = (STMT_VINFO_GROUP_SIZE (stmt_info)
3193        * SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance)
3194        + nunits - 1) / nunits;
3195   if (orig_vec_stmts_num == 1)
3196     only_one_vec = true;
3197
3198   /* Number of copies is determined by the final vectorization factor
3199      relatively to SLP_NODE_INSTANCE unrolling factor.  */
3200   ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3201
3202   /* Generate permutation masks for every NODE. Number of masks for each NODE
3203      is equal to GROUP_SIZE.
3204      E.g., we have a group of three nodes with three loads from the same
3205      location in each node, and the vector size is 4. I.e., we have a
3206      a0b0c0a1b1c1... sequence and we need to create the following vectors:
3207      for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3208      for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3209      ...
3210
3211      The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3212      The last mask is illegal since we assume two operands for permute
3213      operation, and the mask element values can't be outside that range.
3214      Hence, the last mask must be converted into {2,5,5,5}.
3215      For the first two permutations we need the first and the second input
3216      vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3217      we need the second and the third vectors: {b1,c1,a2,b2} and
3218      {c2,a3,b3,c3}.  */
3219
3220   {
3221       index = 0;
3222       vect_stmts_counter = 0;
3223       vec_index = 0;
3224       first_vec_index = vec_index++;
3225       if (only_one_vec)
3226         second_vec_index = first_vec_index;
3227       else
3228         second_vec_index =  vec_index++;
3229
3230       for (j = 0; j < unroll_factor; j++)
3231         {
3232           for (k = 0; k < group_size; k++)
3233             {
3234               i = SLP_TREE_LOAD_PERMUTATION (node)[k];
3235               first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info);
3236               if (!vect_get_mask_element (stmt, first_mask_element, 0,
3237                                           nunits, only_one_vec, index,
3238                                           mask, &current_mask_element,
3239                                           &need_next_vector,
3240                                           &number_of_mask_fixes, &mask_fixed,
3241                                           &needs_first_vector))
3242                 return false;
3243               gcc_assert (current_mask_element >= 0
3244                           && current_mask_element < 2 * nunits);
3245               mask[index++] = current_mask_element;
3246
3247               if (index == nunits)
3248                 {
3249                   index = 0;
3250                   if (!can_vec_perm_p (mode, false, mask))
3251                     {
3252                       if (dump_enabled_p ())
3253                         {
3254                           dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3255                                            vect_location, 
3256                                            "unsupported vect permute { ");
3257                           for (i = 0; i < nunits; ++i)
3258                             dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3259                                          mask[i]);
3260                           dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3261                         }
3262                       return false;
3263                     }
3264
3265                   if (!analyze_only)
3266                     {
3267                       int l;
3268                       tree mask_vec, *mask_elts;
3269                       mask_elts = XALLOCAVEC (tree, nunits);
3270                       for (l = 0; l < nunits; ++l)
3271                         mask_elts[l] = build_int_cst (mask_element_type,
3272                                                       mask[l]);
3273                       mask_vec = build_vector (mask_type, mask_elts);
3274
3275                       if (need_next_vector)
3276                         {
3277                           first_vec_index = second_vec_index;
3278                           second_vec_index = vec_index;
3279                         }
3280
3281                       vect_create_mask_and_perm (stmt,
3282                                mask_vec, first_vec_index, second_vec_index,
3283                                gsi, node, vectype, dr_chain,
3284                                ncopies, vect_stmts_counter++);
3285                     }
3286                 }
3287             }
3288         }
3289     }
3290
3291   return true;
3292 }
3293
3294
3295
3296 /* Vectorize SLP instance tree in postorder.  */
3297
3298 static bool
3299 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3300                             unsigned int vectorization_factor)
3301 {
3302   gimple *stmt;
3303   bool grouped_store, is_store;
3304   gimple_stmt_iterator si;
3305   stmt_vec_info stmt_info;
3306   unsigned int vec_stmts_size, nunits, group_size;
3307   tree vectype;
3308   int i;
3309   slp_tree child;
3310
3311   if (!node)
3312     return false;
3313
3314   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3315     vect_schedule_slp_instance (child, instance, vectorization_factor);
3316
3317   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3318   stmt_info = vinfo_for_stmt (stmt);
3319
3320   /* VECTYPE is the type of the destination.  */
3321   vectype = STMT_VINFO_VECTYPE (stmt_info);
3322   nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3323   group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3324
3325   /* For each SLP instance calculate number of vector stmts to be created
3326      for the scalar stmts in each node of the SLP tree.  Number of vector
3327      elements in one vector iteration is the number of scalar elements in
3328      one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3329      size.
3330      Unless this is a SLP reduction in which case the number of vector
3331      stmts is equal to the number of vector stmts of the children.  */
3332   if (GROUP_FIRST_ELEMENT (stmt_info)
3333       && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3334     vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3335   else
3336     vec_stmts_size = (vectorization_factor * group_size) / nunits;
3337
3338   if (!SLP_TREE_VEC_STMTS (node).exists ())
3339     {
3340       SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
3341       SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3342     }
3343
3344   if (dump_enabled_p ())
3345     {
3346       dump_printf_loc (MSG_NOTE,vect_location,
3347                        "------>vectorizing SLP node starting from: ");
3348       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3349       dump_printf (MSG_NOTE, "\n");
3350     }
3351
3352   /* Vectorized stmts go before the last scalar stmt which is where
3353      all uses are ready.  */
3354   si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3355
3356   /* Mark the first element of the reduction chain as reduction to properly
3357      transform the node.  In the analysis phase only the last element of the
3358      chain is marked as reduction.  */
3359   if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3360       && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3361     {
3362       STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3363       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3364     }
3365
3366   /* Handle two-operation SLP nodes by vectorizing the group with
3367      both operations and then performing a merge.  */
3368   if (SLP_TREE_TWO_OPERATORS (node))
3369     {
3370       enum tree_code code0 = gimple_assign_rhs_code (stmt);
3371       enum tree_code ocode;
3372       gimple *ostmt;
3373       unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3374       bool allsame = true;
3375       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3376         if (gimple_assign_rhs_code (ostmt) != code0)
3377           {
3378             mask[i] = 1;
3379             allsame = false;
3380             ocode = gimple_assign_rhs_code (ostmt);
3381           }
3382         else
3383           mask[i] = 0;
3384       if (!allsame)
3385         {
3386           vec<gimple *> v0;
3387           vec<gimple *> v1;
3388           unsigned j;
3389           tree tmask = NULL_TREE;
3390           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3391           v0 = SLP_TREE_VEC_STMTS (node).copy ();
3392           SLP_TREE_VEC_STMTS (node).truncate (0);
3393           gimple_assign_set_rhs_code (stmt, ocode);
3394           vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3395           gimple_assign_set_rhs_code (stmt, code0);
3396           v1 = SLP_TREE_VEC_STMTS (node).copy ();
3397           SLP_TREE_VEC_STMTS (node).truncate (0);
3398           tree meltype = build_nonstandard_integer_type
3399               (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3400           tree mvectype = get_same_sized_vectype (meltype, vectype);
3401           unsigned k = 0, l;
3402           for (j = 0; j < v0.length (); ++j)
3403             {
3404               tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3405               for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3406                 {
3407                   if (k >= group_size)
3408                     k = 0;
3409                   melts[l] = build_int_cst
3410                       (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3411                 }
3412               tmask = build_vector (mvectype, melts);
3413
3414               /* ???  Not all targets support a VEC_PERM_EXPR with a
3415                  constant mask that would translate to a vec_merge RTX
3416                  (with their vec_perm_const_ok).  We can either not
3417                  vectorize in that case or let veclower do its job.
3418                  Unfortunately that isn't too great and at least for
3419                  plus/minus we'd eventually like to match targets
3420                  vector addsub instructions.  */
3421               gimple *vstmt;
3422               vstmt = gimple_build_assign (make_ssa_name (vectype),
3423                                            VEC_PERM_EXPR,
3424                                            gimple_assign_lhs (v0[j]),
3425                                            gimple_assign_lhs (v1[j]), tmask);
3426               vect_finish_stmt_generation (stmt, vstmt, &si);
3427               SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3428             }
3429           v0.release ();
3430           v1.release ();
3431           return false;
3432         }
3433     }
3434   is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3435   return is_store;
3436 }
3437
3438 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3439    For loop vectorization this is done in vectorizable_call, but for SLP
3440    it needs to be deferred until end of vect_schedule_slp, because multiple
3441    SLP instances may refer to the same scalar stmt.  */
3442
3443 static void
3444 vect_remove_slp_scalar_calls (slp_tree node)
3445 {
3446   gimple *stmt, *new_stmt;
3447   gimple_stmt_iterator gsi;
3448   int i;
3449   slp_tree child;
3450   tree lhs;
3451   stmt_vec_info stmt_info;
3452
3453   if (!node)
3454     return;
3455
3456   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3457     vect_remove_slp_scalar_calls (child);
3458
3459   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3460     {
3461       if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3462         continue;
3463       stmt_info = vinfo_for_stmt (stmt);
3464       if (stmt_info == NULL
3465           || is_pattern_stmt_p (stmt_info)
3466           || !PURE_SLP_STMT (stmt_info))
3467         continue;
3468       lhs = gimple_call_lhs (stmt);
3469       new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3470       set_vinfo_for_stmt (new_stmt, stmt_info);
3471       set_vinfo_for_stmt (stmt, NULL);
3472       STMT_VINFO_STMT (stmt_info) = new_stmt;
3473       gsi = gsi_for_stmt (stmt);
3474       gsi_replace (&gsi, new_stmt, false);
3475       SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3476     }
3477 }
3478
3479 /* Generate vector code for all SLP instances in the loop/basic block.  */
3480
3481 bool
3482 vect_schedule_slp (vec_info *vinfo)
3483 {
3484   vec<slp_instance> slp_instances;
3485   slp_instance instance;
3486   unsigned int i, vf;
3487   bool is_store = false;
3488
3489   slp_instances = vinfo->slp_instances;
3490   if (is_a <loop_vec_info> (vinfo))
3491     vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
3492   else
3493     vf = 1;
3494
3495   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3496     {
3497       /* Schedule the tree of INSTANCE.  */
3498       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3499                                              instance, vf);
3500       if (dump_enabled_p ())
3501         dump_printf_loc (MSG_NOTE, vect_location,
3502                          "vectorizing stmts using SLP.\n");
3503     }
3504
3505   FOR_EACH_VEC_ELT (slp_instances, i, instance)
3506     {
3507       slp_tree root = SLP_INSTANCE_TREE (instance);
3508       gimple *store;
3509       unsigned int j;
3510       gimple_stmt_iterator gsi;
3511
3512       /* Remove scalar call stmts.  Do not do this for basic-block
3513          vectorization as not all uses may be vectorized.
3514          ???  Why should this be necessary?  DCE should be able to
3515          remove the stmts itself.
3516          ???  For BB vectorization we can as well remove scalar
3517          stmts starting from the SLP tree root if they have no
3518          uses.  */
3519       if (is_a <loop_vec_info> (vinfo))
3520         vect_remove_slp_scalar_calls (root);
3521
3522       for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3523                   && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3524         {
3525           if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3526             break;
3527
3528          if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3529            store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3530           /* Free the attached stmt_vec_info and remove the stmt.  */
3531           gsi = gsi_for_stmt (store);
3532           unlink_stmt_vdef (store);
3533           gsi_remove (&gsi, true);
3534           release_defs (store);
3535           free_stmt_vec_info (store);
3536         }
3537     }
3538
3539   return is_store;
3540 }
3541
3542
3543 /* Vectorize the basic block.  */
3544
3545 void
3546 vect_slp_transform_bb (basic_block bb)
3547 {
3548   bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3549   gimple_stmt_iterator si;
3550
3551   gcc_assert (bb_vinfo);
3552
3553   if (dump_enabled_p ())
3554     dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
3555
3556   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3557     {
3558       gimple *stmt = gsi_stmt (si);
3559       stmt_vec_info stmt_info;
3560
3561       if (dump_enabled_p ())
3562         {
3563           dump_printf_loc (MSG_NOTE, vect_location,
3564                            "------>SLPing statement: ");
3565           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3566           dump_printf (MSG_NOTE, "\n");
3567         }
3568
3569       stmt_info = vinfo_for_stmt (stmt);
3570       gcc_assert (stmt_info);
3571
3572       /* Schedule all the SLP instances when the first SLP stmt is reached.  */
3573       if (STMT_SLP_TYPE (stmt_info))
3574         {
3575           vect_schedule_slp (bb_vinfo);
3576           break;
3577         }
3578     }
3579
3580   if (dump_enabled_p ())
3581     dump_printf_loc (MSG_NOTE, vect_location,
3582                      "BASIC BLOCK VECTORIZED\n");
3583
3584   destroy_bb_vec_info (bb_vinfo);
3585 }