2013-11-21 Richard Biener <rguenther@suse.de>
authorrguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 21 Nov 2013 14:09:15 +0000 (14:09 +0000)
committerrguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 21 Nov 2013 14:09:15 +0000 (14:09 +0000)
PR tree-optimization/59058
* tree-scalar-evolution.h (number_of_exit_cond_executions): Remove.
* tree-scalar-evolution.c (number_of_exit_cond_executions): Likewise.
* tree-vectorizer.h (LOOP_PEELING_FOR_ALIGNMENT): Rename to ...
(LOOP_VINFO_PEELING_FOR_ALIGNMENT): ... this.
(NITERS_KNOWN_P): Fold into ...
(LOOP_VINFO_NITERS_KNOWN_P): ... this.
(LOOP_VINFO_PEELING_FOR_NITER): Add.
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop):
Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
(vect_do_peeling_for_alignment): Re-use precomputed niter
instead of re-emitting it.
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
* tree-vect-loop.c (vect_get_loop_niters): Use
number_of_latch_executions.
(new_loop_vec_info): Initialize LOOP_VINFO_PEELING_FOR_NITER.
(vect_analyze_loop_form): Simplify.
(vect_analyze_loop_operations): Move epilogue peeling code ...
(vect_analyze_loop_2): ... here and adjust it to compute
LOOP_VINFO_PEELING_FOR_NITER.
(vect_estimate_min_profitable_iters): Use
LOOP_VINFO_PEELING_FOR_ALIGNMENT.
(vect_build_loop_niters): Emit on the preheader.
(vect_generate_tmps_on_preheader): Likewise.
(vect_transform_loop): Use LOOP_VINFO_PEELING_FOR_NITER instead
of recomputing it.  Adjust.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@205217 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/tree-scalar-evolution.c
gcc/tree-scalar-evolution.h
gcc/tree-vect-data-refs.c
gcc/tree-vect-loop-manip.c
gcc/tree-vect-loop.c
gcc/tree-vectorizer.h

index 766421d..7472098 100644 (file)
@@ -1,5 +1,35 @@
 2013-11-21  Richard Biener  <rguenther@suse.de>
 
+       PR tree-optimization/59058
+       * tree-scalar-evolution.h (number_of_exit_cond_executions): Remove.
+       * tree-scalar-evolution.c (number_of_exit_cond_executions): Likewise.
+       * tree-vectorizer.h (LOOP_PEELING_FOR_ALIGNMENT): Rename to ...
+       (LOOP_VINFO_PEELING_FOR_ALIGNMENT): ... this.
+       (NITERS_KNOWN_P): Fold into ...
+       (LOOP_VINFO_NITERS_KNOWN_P): ... this.
+       (LOOP_VINFO_PEELING_FOR_NITER): Add.
+       * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop):
+       Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
+       (vect_do_peeling_for_alignment): Re-use precomputed niter
+       instead of re-emitting it.
+       * tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
+       Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
+       * tree-vect-loop.c (vect_get_loop_niters): Use
+       number_of_latch_executions.
+       (new_loop_vec_info): Initialize LOOP_VINFO_PEELING_FOR_NITER.
+       (vect_analyze_loop_form): Simplify.
+       (vect_analyze_loop_operations): Move epilogue peeling code ...
+       (vect_analyze_loop_2): ... here and adjust it to compute
+       LOOP_VINFO_PEELING_FOR_NITER.
+       (vect_estimate_min_profitable_iters): Use
+       LOOP_VINFO_PEELING_FOR_ALIGNMENT.
+       (vect_build_loop_niters): Emit on the preheader.
+       (vect_generate_tmps_on_preheader): Likewise.
+       (vect_transform_loop): Use LOOP_VINFO_PEELING_FOR_NITER instead
+       of recomputing it.  Adjust.
+
+2013-11-21  Richard Biener  <rguenther@suse.de>
+
        * tree-vectorizer.h (LOC, UNKNOWN_LOC, EXPR_LOC, LOC_FILE,
        LOC_LINE): Remove wrappers and fix all users.
        (struct _loop_vec_info): Remove loop_line_number member.
index 0c1f1df..de05f64 100644 (file)
@@ -2910,34 +2910,6 @@ number_of_latch_executions (struct loop *loop)
   loop->nb_iterations = res;
   return res;
 }
-
-/* Returns the number of executions of the exit condition of LOOP,
-   i.e., the number by one higher than number_of_latch_executions.
-   Note that unlike number_of_latch_executions, this number does
-   not necessarily fit in the unsigned variant of the type of
-   the control variable -- if the number of iterations is a constant,
-   we return chrec_dont_know if adding one to number_of_latch_executions
-   overflows; however, in case the number of iterations is symbolic
-   expression, the caller is responsible for dealing with this
-   the possible overflow.  */
-
-tree
-number_of_exit_cond_executions (struct loop *loop)
-{
-  tree ret = number_of_latch_executions (loop);
-  tree type = chrec_type (ret);
-
-  if (chrec_contains_undetermined (ret))
-    return ret;
-
-  ret = chrec_fold_plus (type, ret, build_int_cst (type, 1));
-  if (TREE_CODE (ret) == INTEGER_CST
-      && TREE_OVERFLOW (ret))
-    return chrec_dont_know;
-
-  return ret;
-}
-
 \f
 
 /* Counters for the stats.  */
index fc87251..3a65691 100644 (file)
@@ -22,7 +22,6 @@ along with GCC; see the file COPYING3.  If not see
 #define GCC_TREE_SCALAR_EVOLUTION_H
 
 extern tree number_of_latch_executions (struct loop *);
-extern tree number_of_exit_cond_executions (struct loop *);
 extern gimple get_loop_exit_condition (const struct loop *);
 
 extern void scev_initialize (void);
index 42b0d22..c1eb455 100644 (file)
@@ -1735,9 +1735,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
           if (npeel)
-            LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
+            LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
           else
-            LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
+            LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+             = DR_MISALIGNMENT (dr0);
          SET_DR_MISALIGNMENT (dr0, 0);
          if (dump_enabled_p ())
             {
index f5192e8..289e852 100644 (file)
@@ -1736,16 +1736,16 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int
 
   pe = loop_preheader_edge (loop);
 
-  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
     {
-      int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+      int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
 
       if (dump_enabled_p ())
         dump_printf_loc (MSG_NOTE, vect_location,
                          "known peeling = %d.\n", npeel);
 
       iters = build_int_cst (niters_type, npeel);
-      *bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+      *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
     }
   else
     {
@@ -1876,7 +1876,6 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   tree niters_of_prolog_loop;
-  tree n_iters;
   tree wide_prolog_niters;
   struct loop *new_loop;
   int max_iter;
@@ -1918,9 +1917,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
                "loop to %d\n", max_iter);
 
   /* Update number of times loop executes.  */
-  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
   LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
-               TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
+               TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
 
   if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
     wide_prolog_niters = niters_of_prolog_loop;
index 36393da..02aa090 100644 (file)
@@ -771,11 +771,12 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
     vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 }
 
+
 /* Function vect_get_loop_niters.
 
-   Determine how many iterations the loop is executed.
-   If an expression that represents the number of iterations
-   can be constructed, place it in NUMBER_OF_ITERATIONS.
+   Determine how many iterations the loop is executed and place it
+   in NUMBER_OF_ITERATIONS.
+
    Return the loop exit condition.  */
 
 static gimple
@@ -786,20 +787,16 @@ vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                     "=== get_loop_niters ===\n");
-  niters = number_of_exit_cond_executions (loop);
 
-  if (niters != NULL_TREE
-      && niters != chrec_dont_know)
-    {
-      *number_of_iterations = niters;
-
-      if (dump_enabled_p ())
-        {
-          dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
-          dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
-          dump_printf (MSG_NOTE, "\n");
-        }
-    }
+  niters = number_of_latch_executions (loop);
+  /* We want the number of loop header executions which is the number
+     of latch executions plus one.
+     ???  For UINT_MAX latch executions this number overflows to zero
+     for loops like do { n++; } while (n != 0);  */
+  if (niters && !chrec_contains_undetermined (niters))
+    niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters,
+                         build_int_cst (TREE_TYPE (niters), 1));
+  *number_of_iterations = niters;
 
   return get_loop_exit_condition (loop);
 }
@@ -907,7 +904,7 @@ new_loop_vec_info (struct loop *loop)
   LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
-  LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
+  LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
   LOOP_VINFO_VECT_FACTOR (res) = 0;
   LOOP_VINFO_LOOP_NEST (res).create (3);
   LOOP_VINFO_DATAREFS (res).create (10);
@@ -924,6 +921,7 @@ new_loop_vec_info (struct loop *loop)
   LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
   LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
   LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
+  LOOP_VINFO_PEELING_FOR_NITER (res) = false;
   LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
 
   return res;
@@ -1091,12 +1089,12 @@ vect_analyze_loop_form (struct loop *loop)
         }
 
       if (empty_block_p (loop->header))
-    {
-          if (dump_enabled_p ())
-            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "not vectorized: empty loop.\n");
-      return NULL;
-    }
+         return NULL;
+       }
     }
   else
     {
@@ -1243,7 +1241,8 @@ vect_analyze_loop_form (struct loop *loop)
       return NULL;
     }
 
-  if (!number_of_iterations)
+  if (!number_of_iterations
+      || chrec_contains_undetermined (number_of_iterations))
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1254,17 +1253,21 @@ vect_analyze_loop_form (struct loop *loop)
       return NULL;
     }
 
-  if (chrec_contains_undetermined (number_of_iterations))
+  if (integer_zerop (number_of_iterations))
     {
       if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "Infinite number of iterations.\n");
+       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                        "not vectorized: number of iterations = 0.\n");
       if (inner_loop_vinfo)
-       destroy_loop_vec_info (inner_loop_vinfo, true);
+        destroy_loop_vec_info (inner_loop_vinfo, true);
       return NULL;
     }
 
-  if (!NITERS_KNOWN_P (number_of_iterations))
+  loop_vinfo = new_loop_vec_info (loop);
+  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
+  LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
+
+  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
     {
       if (dump_enabled_p ())
         {
@@ -1274,19 +1277,6 @@ vect_analyze_loop_form (struct loop *loop)
           dump_printf (MSG_NOTE, "\n");
         }
     }
-  else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
-    {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "not vectorized: number of iterations = 0.\n");
-      if (inner_loop_vinfo)
-        destroy_loop_vec_info (inner_loop_vinfo, true);
-      return NULL;
-    }
-
-  loop_vinfo = new_loop_vec_info (loop);
-  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
-  LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
 
   STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
 
@@ -1588,23 +1578,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
       return false;
     }
 
-  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
-      || ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
-         < exact_log2 (vectorization_factor)))
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
-      if (!vect_can_advance_ivs_p (loop_vinfo)
-         || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
-        {
-          if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "not vectorized: can't create required "
-                            "epilog loop\n");
-          return false;
-        }
-    }
-
   return true;
 }
 
@@ -1760,6 +1733,40 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
       return false;
     }
 
+  /* Decide whether we need to create an epilogue loop to handle
+     remaining scalar iterations.  */
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+    {
+      if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
+                  - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+         < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+       LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+    }
+  else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+          || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+              < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
+    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+
+  /* If an epilogue loop is required make sure we can create one.  */
+  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+      || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
+    {
+      if (dump_enabled_p ())
+        dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
+      if (!vect_can_advance_ivs_p (loop_vinfo)
+         || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+                                          single_exit (LOOP_VINFO_LOOP
+                                                        (loop_vinfo))))
+        {
+          if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "not vectorized: can't create required "
+                            "epilog loop\n");
+          return false;
+        }
+    }
+
   return true;
 }
 
@@ -2689,7 +2696,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
   int scalar_single_iter_cost = 0;
   int scalar_outside_cost = 0;
   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+  int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
   void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
 
   /* Cost model disabled.  */
@@ -2880,7 +2887,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
       else
        {
          /* Cost model check occurs at prologue generation.  */
-         if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
+         if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
            scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
              + vect_get_stmt_cost (cond_branch_not_taken); 
          /* Cost model check occurs at epilogue generation.  */
@@ -5574,47 +5581,51 @@ vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
 
 
 /* This function builds ni_name = number of iterations.  Statements
-   are queued onto SEQ.  */
+   are emitted on the loop preheader edge.  */
 
 static tree
-vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq *seq)
+vect_build_loop_niters (loop_vec_info loop_vinfo)
 {
-  tree ni_name, var;
-  gimple_seq stmts = NULL;
   tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
+  if (TREE_CODE (ni) == INTEGER_CST)
+    return ni;
+  else
+    {
+      tree ni_name, var;
+      gimple_seq stmts = NULL;
+      edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
 
-  var = create_tmp_var (TREE_TYPE (ni), "niters");
-  ni_name = force_gimple_operand (ni, &stmts, false, var);
-
-  if (stmts)
-    gimple_seq_add_seq (seq, stmts);
+      var = create_tmp_var (TREE_TYPE (ni), "niters");
+      ni_name = force_gimple_operand (ni, &stmts, false, var);
+      if (stmts)
+       gsi_insert_seq_on_edge_immediate (pe, stmts);
 
-  return ni_name;
+      return ni_name;
+    }
 }
 
 
 /* This function generates the following statements:
 
- ni_name = number of iterations loop executes
- ratio = ni_name / vf
- ratio_mult_vf_name = ratio * vf
  ni_name = number of iterations loop executes
  ratio = ni_name / vf
  ratio_mult_vf_name = ratio * vf
 
and places them in COND_EXPR_STMT_LIST.  */
  and places them on the loop preheader edge.  */
 
 static void
 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
                                 tree ni_name,
                                 tree *ratio_mult_vf_name_ptr,
-                                tree *ratio_name_ptr,
-                                gimple_seq *cond_expr_stmt_list)
+                                tree *ratio_name_ptr)
 {
-  gimple_seq stmts;
   tree ni_minus_gap_name;
   tree var;
   tree ratio_name;
   tree ratio_mult_vf_name;
   tree ni = LOOP_VINFO_NITERS (loop_vinfo);
   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
   tree log_vf;
 
   log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
@@ -5630,11 +5641,10 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
       if (!is_gimple_val (ni_minus_gap_name))
        {
          var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
-
-          stmts = NULL;
+          gimple stmts = NULL;
           ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
                                                    true, var);
-         gimple_seq_add_seq (cond_expr_stmt_list, stmts);
+         gsi_insert_seq_on_edge_immediate (pe, stmts);
         }
     }
   else
@@ -5647,10 +5657,9 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
   if (!is_gimple_val (ratio_name))
     {
       var = create_tmp_var (TREE_TYPE (ni), "bnd");
-
-      stmts = NULL;
+      gimple stmts = NULL;
       ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
-      gimple_seq_add_seq (cond_expr_stmt_list, stmts);
+      gsi_insert_seq_on_edge_immediate (pe, stmts);
     }
   *ratio_name_ptr = ratio_name;
 
@@ -5663,11 +5672,10 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
       if (!is_gimple_val (ratio_mult_vf_name))
        {
          var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
-
-         stmts = NULL;
+         gimple stmts = NULL;
          ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
                                                     true, var);
-         gimple_seq_add_seq (cond_expr_stmt_list, stmts);
+         gsi_insert_seq_on_edge_immediate (pe, stmts);
        }
       *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
     }
@@ -5739,20 +5747,20 @@ vect_transform_loop (loop_vec_info loop_vinfo)
       check_profitability = false;
     }
 
+  tree ni_name = vect_build_loop_niters (loop_vinfo);
+  LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
+
   /* Peel the loop if there are data refs with unknown alignment.
-     Only one data ref with unknown store is allowed.
-     This clobbers LOOP_VINFO_NITERS but retains the original
-     in LOOP_VINFO_NITERS_UNCHANGED.  So we cannot avoid re-computing
-     niters.  */
+     Only one data ref with unknown store is allowed.  */
 
-  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
     {
-      gimple_seq stmts = NULL;
-      tree ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
-      gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
       vect_do_peeling_for_alignment (loop_vinfo, ni_name,
                                     th, check_profitability);
       check_profitability = false;
+      /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
+        be re-computed.  */
+      ni_name = NULL_TREE;
     }
 
   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
@@ -5763,16 +5771,14 @@ vect_transform_loop (loop_vec_info loop_vinfo)
      will remain scalar and will compute the remaining (n%VF) iterations.
      (VF is the vectorization factor).  */
 
-  if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
-      < exact_log2 (vectorization_factor)
+  if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
       || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
     {
-      tree ni_name, ratio_mult_vf;
-      gimple_seq stmts = NULL;
-      ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
+      tree ratio_mult_vf;
+      if (!ni_name)
+       ni_name = vect_build_loop_niters (loop_vinfo);
       vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
-                                      &ratio, &stmts);
-      gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+                                      &ratio);
       vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
                                      th, check_profitability);
     }
@@ -5781,12 +5787,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
                LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
   else
     {
-      tree ni_name;
-      gimple_seq stmts = NULL;
-      ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
-      vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL,
-                                      &ratio, &stmts);
-      gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+      if (!ni_name)
+       ni_name = vect_build_loop_niters (loop_vinfo);
+      vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
     }
 
   /* 1) Make sure the loop header has exactly two entries
index 23c4d88..58884f8 100644 (file)
@@ -361,7 +361,7 @@ typedef struct _loop_vec_info {
 #define LOOP_VINFO_DATAREFS(L)             (L)->datarefs
 #define LOOP_VINFO_DDRS(L)                 (L)->ddrs
 #define LOOP_VINFO_INT_NITERS(L)           (TREE_INT_CST_LOW ((L)->num_iters))
-#define LOOP_PEELING_FOR_ALIGNMENT(L)      (L)->peeling_for_alignment
+#define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
 #define LOOP_VINFO_UNALIGNED_DR(L)         (L)->unaligned_dr
 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L)   (L)->may_misalign_stmts
 #define LOOP_VINFO_MAY_ALIAS_DDRS(L)       (L)->may_alias_ddrs
@@ -375,18 +375,15 @@ typedef struct _loop_vec_info {
 #define LOOP_VINFO_TARGET_COST_DATA(L)     (L)->target_cost_data
 #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
 #define LOOP_VINFO_OPERANDS_SWAPPED(L)     (L)->operands_swapped
+#define LOOP_VINFO_PEELING_FOR_NITER(L)    (L)->peeling_for_niter
 
 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
-(L)->may_misalign_stmts.length () > 0
+  (L)->may_misalign_stmts.length () > 0
 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L)     \
-(L)->may_alias_ddrs.length () > 0
-
-#define NITERS_KNOWN_P(n)                     \
-(tree_fits_shwi_p ((n))                        \
-&& tree_to_shwi ((n)) > 0)
+  (L)->may_alias_ddrs.length () > 0
 
 #define LOOP_VINFO_NITERS_KNOWN_P(L)          \
-NITERS_KNOWN_P ((L)->num_iters)
+  (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
 
 static inline loop_vec_info
 loop_vec_info_for_loop (struct loop *loop)