static bool vect_analyze_scalar_cycles (loop_vec_info);
static bool vect_analyze_data_ref_accesses (loop_vec_info);
static bool vect_analyze_data_refs_alignment (loop_vec_info);
-static void vect_compute_data_refs_alignment (loop_vec_info);
+static bool vect_compute_data_refs_alignment (loop_vec_info);
static bool vect_analyze_operations (loop_vec_info);
/* Main code transformation functions. */
static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
+static enum dr_alignment_support vect_supportable_dr_alignment
+ (struct data_reference *);
static void vect_align_data_ref (tree);
static void vect_enhance_data_refs_alignment (loop_vec_info);
"loop_write_datarefs");
VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
"loop_read_datarefs");
+ LOOP_VINFO_UNALIGNED_DR (res) = NULL;
- for (i=0; i<MAX_NUMBER_OF_UNALIGNED_DATA_REFS; i++)
- LOOP_UNALIGNED_DR (res, i) = NULL;
return res;
}
tree op;
tree vec_oprnd1;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
struct loop *loop = STMT_VINFO_LOOP (stmt_info);
enum machine_mode vec_mode;
tree dummy;
+ enum dr_alignment_support alignment_support_cheme;
/* Is vectorizable store? */
if (vect_debug_details (NULL))
fprintf (dump_file, "transform store");
+ alignment_support_cheme = vect_supportable_dr_alignment (dr);
+ gcc_assert (alignment_support_cheme);
+ gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
+
/* Handle use - get the vectorized def from the defining stmt. */
vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
basic_block new_bb;
struct loop *loop = STMT_VINFO_LOOP (stmt_info);
edge pe = loop_preheader_edge (loop);
- bool software_pipeline_loads_p = false;
+ enum dr_alignment_support alignment_support_cheme;
/* Is vectorizable load? */
return false;
}
- if (!aligned_access_p (dr))
- {
- if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
- && (!targetm.vectorize.builtin_mask_for_load
- || targetm.vectorize.builtin_mask_for_load ()))
- software_pipeline_loads_p = true;
- else if (!targetm.vectorize.misaligned_mem_ok (mode))
- {
- /* Possibly unaligned access, and can't software pipeline the loads.
- */
- if (vect_debug_details (loop))
- fprintf (dump_file, "Arbitrary load not supported.");
- return false;
- }
- }
-
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
if (vect_debug_details (NULL))
fprintf (dump_file, "transform load.");
- if (!software_pipeline_loads_p)
+ alignment_support_cheme = vect_supportable_dr_alignment (dr);
+ gcc_assert (alignment_support_cheme);
+
+ if (alignment_support_cheme == dr_aligned
+ || alignment_support_cheme == dr_unaligned_supported)
{
/* Create:
p = initial_addr;
TREE_OPERAND (new_stmt, 0) = new_temp;
vect_finish_stmt_generation (stmt, new_stmt, bsi);
}
- else /* software-pipeline the loads */
+ else if (alignment_support_cheme == dr_unaligned_software_pipeline)
{
/* Create:
p1 = initial_addr;
TREE_OPERAND (new_stmt, 0) = new_temp;
vect_finish_stmt_generation (stmt, new_stmt, bsi);
}
+ else
+ gcc_unreachable ();
*vec_stmt = new_stmt;
return true;
}
+/* Function vect_supportable_dr_alignment
+
+ Return whether the data reference DR is supported with respect to its
+ alignment. */
+
+static enum dr_alignment_support
+vect_supportable_dr_alignment (struct data_reference *dr)
+{
+ tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
+ enum machine_mode mode = (int) TYPE_MODE (vectype);
+
+ if (aligned_access_p (dr))
+ return dr_aligned;
+
+ /* Possibly unaligned access. */
+
+ if (DR_IS_READ (dr))
+ {
+ if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
+ && (!targetm.vectorize.builtin_mask_for_load
+ || targetm.vectorize.builtin_mask_for_load ()))
+ return dr_unaligned_software_pipeline;
+
+ if (targetm.vectorize.misaligned_mem_ok (mode))
+ /* Can't software pipeline the loads. */
+ return dr_unaligned_supported;
+ }
+
+ /* Unsupported. */
+ return dr_unaligned_unsupported;
+}
+
+
/* Function vect_transform_stmt.
Create a vectorized stmt to replace STMT, and insert it at BSI. */
Set the number of iterations for the loop represented by LOOP_VINFO
to the minimum between NITERS (the original iteration count of the loop)
- and the misalignment DR - the first data reference in the list
- LOOP_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of this
- loop, the data reference DR will refer to an aligned location. */
+ and the misalignment of DR - the first data reference recorded in
+ LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
+ this loop, the data reference DR will refer to an aligned location. */
static tree
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
{
- struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
+ struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree var, stmt;
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
vect_update_inits_of_dr (dr, loop, niters);
- DR_MISALIGNMENT (dr) = -1;
}
}
'niters' is set to the misalignment of one of the data references in the
loop, thereby forcing it to refer to an aligned location at the beginning
of the execution of this loop. The data reference for which we are
- peeling is chosen from LOOP_UNALIGNED_DR. */
+ peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
static void
vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree niters_of_prolog_loop, ni_name;
- struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
if (vect_debug_details (NULL))
fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop),
niters_of_prolog_loop, ni_name, false);
-
- /* Update stmt info of dr according to which we peeled. */
- DR_MISALIGNMENT (dr) = 0;
-
/* Update number of times loop executes. */
vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
#ifdef ENABLE_CHECKING
/* FORNOW: Verify that all stmts operate on the same number of
units and no inner unrolling is necessary. */
- gcc_assert (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
- == vectorization_factor);
+ gcc_assert
+ (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
+ == vectorization_factor);
#endif
/* -------- vectorize statement ------------ */
if (vect_debug_details (NULL))
/* It is not possible to vectorize this data reference. */
return false;
}
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
if (TREE_CODE (ref) == ARRAY_REF)
FOR NOW: No analysis is actually performed. Misalignment is calculated
only for trivial cases. TODO. */
-static void
+static bool
vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
{
varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
- vect_compute_data_ref_alignment (dr, loop_vinfo);
+ if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
+ return false;
}
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
- vect_compute_data_ref_alignment (dr, loop_vinfo);
+ if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
+ return false;
}
+
+ return true;
}
FOR NOW: No transformation is actually performed. TODO. */
static void
-vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED)
+vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
{
+ varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
+ varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ unsigned int i;
+
/*
This pass will require a cost model to guide it whether to apply peeling
or versioning or a combination of the two. For example, the scheme that
(whether to generate regular loads/stores, or with special handling for
misalignment).
*/
+
+ /* (1) Peeling to force alignment. */
+
+ /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
+ Considerations:
+ + How many accesses will become aligned due to the peeling
+ - How many accesses will become unaligned due to the peeling,
+ and the cost of misaligned accesses.
+ - The cost of peeling (the extra runtime checks, the increase
+ in code size).
+
+ The scheme we use FORNOW: peel to force the alignment of the first
+ misaliged store in the loop.
+ Rationale: misaligned store are not yet supported.
+
+ TODO: Use a better cost model. */
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
+ {
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
+ if (!aligned_access_p (dr))
+ {
+ LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
+ LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
+ break;
+ }
+ }
+
+ if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
+ {
+ if (vect_debug_details (loop))
+ fprintf (dump_file, "Peeling for alignment will not be applied.");
+ return;
+ }
+ else
+ if (vect_debug_details (loop))
+ fprintf (dump_file, "Peeling for alignment will be applied.");
+
+
+ /* (1.2) Update the alignment info according to the peeling factor.
+ If the misalignment of the DR we peel for is M, then the
+ peeling factor is VF - M, and the misalignment of each access DR_i
+ in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
+ If the misalignment of the DR we peel for is unknown, then the
+ misalignment of each access DR_i in the loop is also unknown.
+
+ FORNOW: set the misalignment of the accesses to unknown even
+ if the peeling factor is known at compile time.
+
+ TODO: - if the peeling factor is known at compile time, use that
+ when updating the misalignment info of the loop DRs.
+ - consider accesses that are known to have the same
+ alignment, even if that alignment is unknown. */
+
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
+ {
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
+ if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
+ DR_MISALIGNMENT (dr) = 0;
+ else
+ DR_MISALIGNMENT (dr) = -1;
+ }
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
+ {
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
+ if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
+ DR_MISALIGNMENT (dr) = 0;
+ else
+ DR_MISALIGNMENT (dr) = -1;
+ }
}
static bool
vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
{
+ varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
- /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
-
+ enum dr_alignment_support supportable_dr_alignment;
unsigned int i;
- unsigned int decide_peeling_count = 0;
if (vect_debug_details (NULL))
fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
/* This pass may take place at function granularity instead of at loop
granularity. */
- vect_compute_data_refs_alignment (loop_vinfo);
+ if (!vect_compute_data_refs_alignment (loop_vinfo))
+ {
+ if (vect_debug_details (loop) || vect_debug_stats (loop))
+ fprintf (dump_file,
+ "not vectorized: can't calculate alignment for data ref.");
+ return false;
+ }
- /* This pass will use loop versioning and loop peeling in order to enhance
- the alignment of data references in the loop.
- FOR NOW: we assume that whatever versioning/peeling took place, the
- original loop is to be vectorized. Any other loops that were created by
- the transformations performed in this pass - are not supposed to be
- vectorized. This restriction will be relaxed. */
+ /* This pass will decide on using loop versioning and/or loop peeling in
+ order to enhance the alignment of data references in the loop. */
vect_enhance_data_refs_alignment (loop_vinfo);
- /* Finally, check that loop can be vectorized.
- FOR NOW: Until support for misaligned stores is in place, only if all
- stores are aligned can the loop be vectorized. This restriction will be
- relaxed. In the meantime, we can force the alignment of on of the
- data-references in the loop using peeling. We currently use a heuristic
- that peels the first misaligned store, but we plan to develop a
- better cost model to guide the decision on which data-access to peel for.
- */
+ /* Finally, check that all the data references in the loop can be
+ handled with respect to their alignment. */
- for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
{
- struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
- if (!aligned_access_p (dr))
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
+ supportable_dr_alignment = vect_supportable_dr_alignment (dr);
+ if (!supportable_dr_alignment)
{
- /* Decide here whether we need peeling for alignment. */
- decide_peeling_count++;
- if (decide_peeling_count > MAX_NUMBER_OF_UNALIGNED_DATA_REFS)
- {
- if (vect_debug_stats (loop) || vect_debug_details (loop))
- fprintf (dump_file,
- "not vectorized: multiple misaligned stores.");
- return false;
- }
- else
- {
- LOOP_UNALIGNED_DR (loop_vinfo, decide_peeling_count - 1) = dr;
- LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
- }
+ if (vect_debug_details (loop) || vect_debug_stats (loop))
+ fprintf (dump_file, "not vectorized: unsupported unaligned load.");
+ return false;
}
}
-
- /* The vectorizer now supports misaligned loads, so we don't fail anymore
- in the presence of a misaligned read dataref. For some targets however
- it may be preferable not to vectorize in such a case as misaligned
- accesses are very costly. This should be considered in the future. */
-/*
- for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
{
- struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
- if (!aligned_access_p (dr))
+ struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
+ supportable_dr_alignment = vect_supportable_dr_alignment (dr);
+ if (!supportable_dr_alignment)
{
- if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
- || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
- fprintf (dump_file, "not vectorized: unaligned load.");
+ if (vect_debug_details (loop) || vect_debug_stats (loop))
+ fprintf (dump_file, "not vectorized: unsupported unaligned store.");
return false;
}
}
-*/
return true;
}