gimple.h: Remove all includes.
[platform/upstream/gcc.git] / gcc / tree-vect-loop.c
index 292e771..c91c2e1 100644 (file)
@@ -24,14 +24,22 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "dumpfile.h"
 #include "tm.h"
-#include "ggc.h"
 #include "tree.h"
+#include "stor-layout.h"
 #include "basic-block.h"
 #include "gimple-pretty-print.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
 #include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
 #include "gimple-ssa.h"
 #include "tree-phinodes.h"
 #include "ssa-iterators.h"
+#include "stringpool.h"
 #include "tree-ssanames.h"
 #include "tree-ssa-loop-ivopts.h"
 #include "tree-ssa-loop-manip.h"
@@ -766,11 +774,12 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 }
 
+
 /* Function vect_get_loop_niters.
 
-   Determine how many iterations the loop is executed.
-   If an expression that represents the number of iterations
-   can be constructed, place it in NUMBER_OF_ITERATIONS.
+   Determine how many iterations the loop is executed and place it
+   in NUMBER_OF_ITERATIONS.
+
    Return the loop exit condition.  */
 
 static gimple
@@ -781,20 +790,16 @@ vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                     "=== get_loop_niters ===\n");
-  niters = number_of_exit_cond_executions (loop);
 
-  if (niters != NULL_TREE
-      && niters != chrec_dont_know)
-    {
-      *number_of_iterations = niters;
-
-      if (dump_enabled_p ())
-        {
-          dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
-          dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
-          dump_printf (MSG_NOTE, "\n");
-        }
-    }
+  niters = number_of_latch_executions (loop);
+  /* We want the number of loop header executions which is the number
+     of latch executions plus one.
+     ???  For UINT_MAX latch executions this number overflows to zero
+     for loops like do { n++; } while (n != 0);  */
+  if (niters && !chrec_contains_undetermined (niters))
+    niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters,
+                         build_int_cst (TREE_TYPE (niters), 1));
+  *number_of_iterations = niters;
 
   return get_loop_exit_condition (loop);
 }
@@ -902,7 +907,7 @@ new_loop_vec_info (struct loop *loop)
   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
-  LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
+  LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
   LOOP_VINFO_VECT_FACTOR (res) = 0;
   LOOP_VINFO_LOOP_NEST (res).create (3);
   LOOP_VINFO_DATAREFS (res).create (10);
@@ -919,6 +924,7 @@ new_loop_vec_info (struct loop *loop)
   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
+  LOOP_VINFO_PEELING_FOR_NITER (res) = false;
   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
 
   return res;
@@ -1086,12 +1092,12 @@ vect_analyze_loop_form (struct loop *loop)
         }
 
       if (empty_block_p (loop->header))
-    {
-          if (dump_enabled_p ())
-            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "not vectorized: empty loop.\n");
-      return NULL;
-    }
+         return NULL;
+       }
     }
   else
     {
@@ -1238,7 +1244,8 @@ vect_analyze_loop_form (struct loop *loop)
       return NULL;
     }
 
-  if (!number_of_iterations)
+  if (!number_of_iterations
+      || chrec_contains_undetermined (number_of_iterations))
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1249,17 +1256,21 @@ vect_analyze_loop_form (struct loop *loop)
       return NULL;
     }
 
-  if (chrec_contains_undetermined (number_of_iterations))
+  if (integer_zerop (number_of_iterations))
     {
       if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "Infinite number of iterations.\n");
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "not vectorized: number of iterations = 0.\n");
       if (inner_loop_vinfo)
-       destroy_loop_vec_info (inner_loop_vinfo, true);
+        destroy_loop_vec_info (inner_loop_vinfo, true);
       return NULL;
     }
 
-  if (!NITERS_KNOWN_P (number_of_iterations))
+  loop_vinfo = new_loop_vec_info (loop);
+  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
+  LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
+
+  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
     {
       if (dump_enabled_p ())
         {
@@ -1269,19 +1280,6 @@ vect_analyze_loop_form (struct loop *loop)
           dump_printf (MSG_NOTE, "\n");
         }
     }
-  else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
-    {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "not vectorized: number of iterations = 0.\n");
-      if (inner_loop_vinfo)
-        destroy_loop_vec_info (inner_loop_vinfo, true);
-      return NULL;
-    }
-
-  loop_vinfo = new_loop_vec_info (loop);
-  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
-  LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
 
   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
 
@@ -1583,28 +1581,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
       return false;
     }
 
-  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
-      || ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
-         < exact_log2 (vectorization_factor)))
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.\n");
-      if (!vect_can_advance_ivs_p (loop_vinfo))
-        {
-          if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "not vectorized: can't create epilog loop 1.\n");
-          return false;
-        }
-      if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
-        {
-          if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "not vectorized: can't create epilog loop 2.\n");
-          return false;
-        }
-    }
-
   return true;
 }
 
@@ -1760,6 +1736,40 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
       return false;
     }
 
+  /* Decide whether we need to create an epilogue loop to handle
+     remaining scalar iterations.  */
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+    {
+      if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
+                  - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+         < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+       LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+    }
+  else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+          || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+              < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
+    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+
+  /* If an epilogue loop is required make sure we can create one.  */
+  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+      || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+        dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
+      if (!vect_can_advance_ivs_p (loop_vinfo)
+         || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+                                          single_exit (LOOP_VINFO_LOOP
+                                                        (loop_vinfo))))
+        {
+          if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "not vectorized: can't create required "
+                            "epilog loop\n");
+          return false;
+        }
+    }
+
   return true;
 }
 
@@ -2689,7 +2699,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
   int scalar_single_iter_cost = 0;
   int scalar_outside_cost = 0;
   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+  int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
 
   /* Cost model disabled.  */
@@ -2880,7 +2890,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       else
        {
          /* Cost model check occurs at prologue generation.  */
-         if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
+         if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
            scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
              + vect_get_stmt_cost (cond_branch_not_taken); 
          /* Cost model check occurs at epilogue generation.  */
@@ -3096,10 +3106,10 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
        }
       else
        {
-         int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+         int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
          tree bitsize =
            TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
-         int element_bitsize = tree_low_cst (bitsize, 1);
+         int element_bitsize = tree_to_uhwi (bitsize);
          int nelements = vec_size_in_bits / element_bitsize;
 
          optab = optab_for_tree_code (code, vectype, optab_default);
@@ -3795,14 +3805,14 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
   bool extract_scalar_result = false;
   gimple use_stmt, orig_stmt, reduction_phi = NULL;
   bool nested_in_vect_loop = false;
-  vec<gimple> new_phis = vNULL;
-  vec<gimple> inner_phis = vNULL;
+  auto_vec<gimple> new_phis;
+  auto_vec<gimple> inner_phis;
   enum vect_def_type dt = vect_unknown_def_type;
   int j, i;
-  vec<tree> scalar_results = vNULL;
+  auto_vec<tree> scalar_results;
   unsigned int group_size = 1, k, ratio;
-  vec<tree> vec_initial_defs = vNULL;
-  vec<gimple> phis;
+  auto_vec<tree> vec_initial_defs;
+  auto_vec<gimple> phis;
   bool slp_reduc = false;
   tree new_phi_result;
   gimple inner_phi = NULL;
@@ -3906,8 +3916,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
         }
     }
 
-  vec_initial_defs.release ();
-
   /* 2. Create epilog code.
         The reduction epilog code operates across the elements of the vector
         of partial results computed by the vectorized loop.
@@ -4112,8 +4120,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
       enum tree_code shift_code = ERROR_MARK;
       bool have_whole_vector_shift = true;
       int bit_offset;
-      int element_bitsize = tree_low_cst (bitsize, 1);
-      int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+      int element_bitsize = tree_to_uhwi (bitsize);
+      int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
       tree vec_temp;
 
       if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
@@ -4190,7 +4198,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
             dump_printf_loc (MSG_NOTE, vect_location,
                             "Reduce using scalar code.\n");
 
-          vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+          vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
           FOR_EACH_VEC_ELT (new_phis, i, new_phi)
             {
               if (gimple_code (new_phi) == GIMPLE_PHI)
@@ -4587,10 +4595,6 @@ vect_finalize_reduction:
 
       phis.release ();
     }
-
-  scalar_results.release ();
-  inner_phis.release ();
-  new_phis.release ();
 }
 
 
@@ -4678,10 +4682,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   struct loop * def_stmt_loop, *outer_loop = NULL;
   tree def_arg;
   gimple def_arg_stmt;
-  vec<tree> vec_oprnds0 = vNULL;
-  vec<tree> vec_oprnds1 = vNULL;
-  vec<tree> vect_defs = vNULL;
-  vec<gimple> phis = vNULL;
+  auto_vec<tree> vec_oprnds0;
+  auto_vec<tree> vec_oprnds1;
+  auto_vec<tree> vect_defs;
+  auto_vec<gimple> phis;
   int vec_num;
   tree def0, def1, tem, op0, op1 = NULL_TREE;
 
@@ -5297,11 +5301,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
                                     epilog_reduc_code, phis, reduc_index,
                                     double_reduc, slp_node);
 
-  phis.release ();
-  vect_defs.release ();
-  vec_oprnds0.release ();
-  vec_oprnds1.release ();
-
   return true;
 }
 
@@ -5572,6 +5571,111 @@ vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
     }
 }
 
+
+/* This function builds ni_name = number of iterations.  Statements
+   are emitted on the loop preheader edge.  */
+
+static tree
+vect_build_loop_niters (loop_vec_info loop_vinfo)
+{
+  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
+  if (TREE_CODE (ni) == INTEGER_CST)
+    return ni;
+  else
+    {
+      tree ni_name, var;
+      gimple_seq stmts = NULL;
+      edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+
+      var = create_tmp_var (TREE_TYPE (ni), "niters");
+      ni_name = force_gimple_operand (ni, &stmts, false, var);
+      if (stmts)
+       gsi_insert_seq_on_edge_immediate (pe, stmts);
+
+      return ni_name;
+    }
+}
+
+
+/* This function generates the following statements:
+
+   ni_name = number of iterations loop executes
+   ratio = ni_name / vf
+   ratio_mult_vf_name = ratio * vf
+
+   and places them on the loop preheader edge.  */
+
+static void
+vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
+                                tree ni_name,
+                                tree *ratio_mult_vf_name_ptr,
+                                tree *ratio_name_ptr)
+{
+  tree ni_minus_gap_name;
+  tree var;
+  tree ratio_name;
+  tree ratio_mult_vf_name;
+  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
+  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+  tree log_vf;
+
+  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
+
+  /* If epilogue loop is required because of data accesses with gaps, we
+     subtract one iteration from the total number of iterations here for
+     correct calculation of RATIO.  */
+  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+    {
+      ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+                                      ni_name,
+                                      build_one_cst (TREE_TYPE (ni_name)));
+      if (!is_gimple_val (ni_minus_gap_name))
+       {
+         var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
+          gimple stmts = NULL;
+          ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
+                                                   true, var);
+         gsi_insert_seq_on_edge_immediate (pe, stmts);
+        }
+    }
+  else
+    ni_minus_gap_name = ni_name;
+
+  /* Create: ratio = ni >> log2(vf) */
+
+  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
+                           ni_minus_gap_name, log_vf);
+  if (!is_gimple_val (ratio_name))
+    {
+      var = create_tmp_var (TREE_TYPE (ni), "bnd");
+      gimple stmts = NULL;
+      ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
+      gsi_insert_seq_on_edge_immediate (pe, stmts);
+    }
+  *ratio_name_ptr = ratio_name;
+
+  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
+
+  if (ratio_mult_vf_name_ptr)
+    {
+      ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
+                                       ratio_name, log_vf);
+      if (!is_gimple_val (ratio_mult_vf_name))
+       {
+         var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
+         gimple stmts = NULL;
+         ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
+                                                    true, var);
+         gsi_insert_seq_on_edge_immediate (pe, stmts);
+       }
+      *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
+    }
+
+  return;
+}
+
+
 /* Function vect_transform_loop.
 
    The analysis phase has determined that the loop is vectorizable.
@@ -5635,13 +5739,20 @@ vect_transform_loop (loop_vec_info loop_vinfo)
       check_profitability = false;
     }
 
+  tree ni_name = vect_build_loop_niters (loop_vinfo);
+  LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
+
   /* Peel the loop if there are data refs with unknown alignment.
      Only one data ref with unknown store is allowed.  */
 
-  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
     {
-      vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
+      vect_do_peeling_for_alignment (loop_vinfo, ni_name,
+                                    th, check_profitability);
       check_profitability = false;
+      /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
+        be re-computed.  */
+      ni_name = NULL_TREE;
     }
 
   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
@@ -5652,19 +5763,25 @@ vect_transform_loop (loop_vec_info loop_vinfo)
      will remain scalar and will compute the remaining (n%VF) iterations.
      (VF is the vectorization factor).  */
 
-  if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
-      < exact_log2 (vectorization_factor)
+  if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
-    vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
-                                   th, check_profitability);
+    {
+      tree ratio_mult_vf;
+      if (!ni_name)
+       ni_name = vect_build_loop_niters (loop_vinfo);
+      vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
+                                      &ratio);
+      vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
+                                     th, check_profitability);
+    }
   else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
     ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
                LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
   else
     {
-      tree ni_name, ratio_mult_vf;
-      vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, &ratio_mult_vf,
-                                      &ratio, NULL);
+      if (!ni_name)
+       ni_name = vect_build_loop_niters (loop_vinfo);
+      vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
     }
 
   /* 1) Make sure the loop header has exactly two entries