#include "coretypes.h"
#include "dumpfile.h"
#include "tm.h"
-#include "ggc.h"
#include "tree.h"
+#include "stor-layout.h"
#include "basic-block.h"
#include "gimple-pretty-print.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
#include "gimple-ssa.h"
#include "tree-phinodes.h"
#include "ssa-iterators.h"
+#include "stringpool.h"
#include "tree-ssanames.h"
#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop-manip.h"
vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
}
+
/* Function vect_get_loop_niters.
- Determine how many iterations the loop is executed.
- If an expression that represents the number of iterations
- can be constructed, place it in NUMBER_OF_ITERATIONS.
+ Determine how many iterations the loop is executed and place it
+ in NUMBER_OF_ITERATIONS.
+
Return the loop exit condition. */
static gimple
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== get_loop_niters ===\n");
- niters = number_of_exit_cond_executions (loop);
- if (niters != NULL_TREE
- && niters != chrec_dont_know)
- {
- *number_of_iterations = niters;
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
- dump_printf (MSG_NOTE, "\n");
- }
- }
+ niters = number_of_latch_executions (loop);
+ /* We want the number of loop header executions which is the number
+ of latch executions plus one.
+ ??? For UINT_MAX latch executions this number overflows to zero
+ for loops like do { n++; } while (n != 0); */
+ if (niters && !chrec_contains_undetermined (niters))
+ niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters,
+ build_int_cst (TREE_TYPE (niters), 1));
+ *number_of_iterations = niters;
return get_loop_exit_condition (loop);
}
LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
- LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
+ LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
LOOP_VINFO_VECT_FACTOR (res) = 0;
LOOP_VINFO_LOOP_NEST (res).create (3);
LOOP_VINFO_DATAREFS (res).create (10);
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
+ LOOP_VINFO_PEELING_FOR_NITER (res) = false;
LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
return res;
}
if (empty_block_p (loop->header))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: empty loop.\n");
- return NULL;
- }
+ return NULL;
+ }
}
else
{
return NULL;
}
- if (!number_of_iterations)
+ if (!number_of_iterations
+ || chrec_contains_undetermined (number_of_iterations))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
return NULL;
}
- if (chrec_contains_undetermined (number_of_iterations))
+ if (integer_zerop (number_of_iterations))
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Infinite number of iterations.\n");
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: number of iterations = 0.\n");
if (inner_loop_vinfo)
- destroy_loop_vec_info (inner_loop_vinfo, true);
+ destroy_loop_vec_info (inner_loop_vinfo, true);
return NULL;
}
- if (!NITERS_KNOWN_P (number_of_iterations))
+ loop_vinfo = new_loop_vec_info (loop);
+ LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
+ LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
+
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
if (dump_enabled_p ())
{
dump_printf (MSG_NOTE, "\n");
}
}
- else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: number of iterations = 0.\n");
- if (inner_loop_vinfo)
- destroy_loop_vec_info (inner_loop_vinfo, true);
- return NULL;
- }
-
- loop_vinfo = new_loop_vec_info (loop);
- LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
- LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
return false;
}
- if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
- || ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
- < exact_log2 (vectorization_factor)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required.\n");
- if (!vect_can_advance_ivs_p (loop_vinfo))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: can't create epilog loop 1.\n");
- return false;
- }
- if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: can't create epilog loop 2.\n");
- return false;
- }
- }
-
return true;
}
return false;
}
+ /* Decide whether we need to create an epilogue loop to handle
+ remaining scalar iterations. */
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+ {
+ if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
+ - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+ }
+ else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+ || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+ < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
+ LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+
+ /* If an epilogue loop is required make sure we can create one. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
+ if (!vect_can_advance_ivs_p (loop_vinfo)
+ || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+ single_exit (LOOP_VINFO_LOOP
+ (loop_vinfo))))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: can't create required "
+ "epilog loop\n");
+ return false;
+ }
+ }
+
return true;
}
int scalar_single_iter_cost = 0;
int scalar_outside_cost = 0;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */
else
{
/* Cost model check occurs at prologue generation. */
- if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
+ if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
+ vect_get_stmt_cost (cond_branch_not_taken);
/* Cost model check occurs at epilogue generation. */
}
else
{
- int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+ int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
tree bitsize =
TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
- int element_bitsize = tree_low_cst (bitsize, 1);
+ int element_bitsize = tree_to_uhwi (bitsize);
int nelements = vec_size_in_bits / element_bitsize;
optab = optab_for_tree_code (code, vectype, optab_default);
bool extract_scalar_result = false;
gimple use_stmt, orig_stmt, reduction_phi = NULL;
bool nested_in_vect_loop = false;
- vec<gimple> new_phis = vNULL;
- vec<gimple> inner_phis = vNULL;
+ auto_vec<gimple> new_phis;
+ auto_vec<gimple> inner_phis;
enum vect_def_type dt = vect_unknown_def_type;
int j, i;
- vec<tree> scalar_results = vNULL;
+ auto_vec<tree> scalar_results;
unsigned int group_size = 1, k, ratio;
- vec<tree> vec_initial_defs = vNULL;
- vec<gimple> phis;
+ auto_vec<tree> vec_initial_defs;
+ auto_vec<gimple> phis;
bool slp_reduc = false;
tree new_phi_result;
gimple inner_phi = NULL;
}
}
- vec_initial_defs.release ();
-
/* 2. Create epilog code.
The reduction epilog code operates across the elements of the vector
of partial results computed by the vectorized loop.
enum tree_code shift_code = ERROR_MARK;
bool have_whole_vector_shift = true;
int bit_offset;
- int element_bitsize = tree_low_cst (bitsize, 1);
- int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+ int element_bitsize = tree_to_uhwi (bitsize);
+ int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
tree vec_temp;
if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
dump_printf_loc (MSG_NOTE, vect_location,
"Reduce using scalar code.\n");
- vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+ vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
FOR_EACH_VEC_ELT (new_phis, i, new_phi)
{
if (gimple_code (new_phi) == GIMPLE_PHI)
phis.release ();
}
-
- scalar_results.release ();
- inner_phis.release ();
- new_phis.release ();
}
struct loop * def_stmt_loop, *outer_loop = NULL;
tree def_arg;
gimple def_arg_stmt;
- vec<tree> vec_oprnds0 = vNULL;
- vec<tree> vec_oprnds1 = vNULL;
- vec<tree> vect_defs = vNULL;
- vec<gimple> phis = vNULL;
+ auto_vec<tree> vec_oprnds0;
+ auto_vec<tree> vec_oprnds1;
+ auto_vec<tree> vect_defs;
+ auto_vec<gimple> phis;
int vec_num;
tree def0, def1, tem, op0, op1 = NULL_TREE;
epilog_reduc_code, phis, reduc_index,
double_reduc, slp_node);
- phis.release ();
- vect_defs.release ();
- vec_oprnds0.release ();
- vec_oprnds1.release ();
-
return true;
}
}
}
+
+/* This function builds ni_name = number of iterations. Statements
+ are emitted on the loop preheader edge. */
+
+static tree
+vect_build_loop_niters (loop_vec_info loop_vinfo)
+{
+ tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
+ if (TREE_CODE (ni) == INTEGER_CST)
+ return ni;
+ else
+ {
+ tree ni_name, var;
+ gimple_seq stmts = NULL;
+ edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+
+ var = create_tmp_var (TREE_TYPE (ni), "niters");
+ ni_name = force_gimple_operand (ni, &stmts, false, var);
+ if (stmts)
+ gsi_insert_seq_on_edge_immediate (pe, stmts);
+
+ return ni_name;
+ }
+}
+
+
+/* This function generates the following statements:
+
+ ni_name = number of iterations loop executes
+ ratio = ni_name / vf
+ ratio_mult_vf_name = ratio * vf
+
+ and places them on the loop preheader edge. */
+
+static void
+vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
+ tree ni_name,
+ tree *ratio_mult_vf_name_ptr,
+ tree *ratio_name_ptr)
+{
+ tree ni_minus_gap_name;
+ tree var;
+ tree ratio_name;
+ tree ratio_mult_vf_name;
+ tree ni = LOOP_VINFO_NITERS (loop_vinfo);
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+ tree log_vf;
+
+ log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
+
+ /* If epilogue loop is required because of data accesses with gaps, we
+ subtract one iteration from the total number of iterations here for
+ correct calculation of RATIO. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ {
+ ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
+ ni_name,
+ build_one_cst (TREE_TYPE (ni_name)));
+ if (!is_gimple_val (ni_minus_gap_name))
+ {
+ var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
+ gimple stmts = NULL;
+ ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
+ true, var);
+ gsi_insert_seq_on_edge_immediate (pe, stmts);
+ }
+ }
+ else
+ ni_minus_gap_name = ni_name;
+
+ /* Create: ratio = ni >> log2(vf) */
+
+ ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
+ ni_minus_gap_name, log_vf);
+ if (!is_gimple_val (ratio_name))
+ {
+ var = create_tmp_var (TREE_TYPE (ni), "bnd");
+ gimple stmts = NULL;
+ ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
+ gsi_insert_seq_on_edge_immediate (pe, stmts);
+ }
+ *ratio_name_ptr = ratio_name;
+
+ /* Create: ratio_mult_vf = ratio << log2 (vf). */
+
+ if (ratio_mult_vf_name_ptr)
+ {
+ ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
+ ratio_name, log_vf);
+ if (!is_gimple_val (ratio_mult_vf_name))
+ {
+ var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
+ gimple stmts = NULL;
+ ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
+ true, var);
+ gsi_insert_seq_on_edge_immediate (pe, stmts);
+ }
+ *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
+ }
+
+ return;
+}
+
+
/* Function vect_transform_loop.
The analysis phase has determined that the loop is vectorizable.
check_profitability = false;
}
+ tree ni_name = vect_build_loop_niters (loop_vinfo);
+ LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
+
/* Peel the loop if there are data refs with unknown alignment.
Only one data ref with unknown store is allowed. */
- if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
{
- vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
+ vect_do_peeling_for_alignment (loop_vinfo, ni_name,
+ th, check_profitability);
check_profitability = false;
+ /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
+ be re-computed. */
+ ni_name = NULL_TREE;
}
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
will remain scalar and will compute the remaining (n%VF) iterations.
(VF is the vectorization factor). */
- if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
- < exact_log2 (vectorization_factor)
+ if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
- vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
- th, check_profitability);
+ {
+ tree ratio_mult_vf;
+ if (!ni_name)
+ ni_name = vect_build_loop_niters (loop_vinfo);
+ vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
+ &ratio);
+ vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
+ th, check_profitability);
+ }
else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
else
{
- tree ni_name, ratio_mult_vf;
- vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, &ratio_mult_vf,
- &ratio, NULL);
+ if (!ni_name)
+ ni_name = vect_build_loop_niters (loop_vinfo);
+ vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
}
/* 1) Make sure the loop header has exactly two entries