unsigned int mode_i = 0;
unsigned HOST_WIDE_INT simdlen = loop->simdlen;
+ /* Keep track of the VF for each mode. Initialize all to 0 which indicates
+ a mode has not been analyzed. */
+ auto_vec<poly_uint64, 8> cached_vf_per_mode;
+ for (unsigned i = 0; i < vector_modes.length (); ++i)
+ cached_vf_per_mode.safe_push (0);
+
/* First determine the main loop vectorization mode, either the first
one that works, starting with auto-detecting the vector mode and then
following the targets order of preference, or the one with the
while (1)
{
bool fatal;
+ unsigned int last_mode_i = mode_i;
+ /* Set cached VF to -1 prior to analysis, which indicates a mode has
+ failed. */
+ cached_vf_per_mode[last_mode_i] = -1;
opt_loop_vec_info loop_vinfo
= vect_analyze_loop_1 (loop, shared, &loop_form_info,
NULL, vector_modes, mode_i,
if (loop_vinfo)
{
+ /* Analyzis has been successful so update the VF value. The
+ VF should always be a multiple of unroll_factor and we want to
+ capture the original VF here. */
+ cached_vf_per_mode[last_mode_i]
+ = exact_div (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ loop_vinfo->suggested_unroll_factor);
/* Once we hit the desired simdlen for the first time,
discard any previous attempts. */
if (simdlen
{
/* If the target does not support partial vectors we can shorten the
number of modes to analyze for the epilogue as we know we can't pick a
- mode that has at least as many NUNITS as the main loop's vectorization
- factor, since that would imply the epilogue's vectorization factor
- would be at least as high as the main loop's and we would be
- vectorizing for more scalar iterations than there would be left. */
+ mode that would lead to a VF at least as big as the
+ FIRST_VINFO_VF. */
if (!supports_partial_vectors
- && maybe_ge (GET_MODE_NUNITS (vector_modes[mode_i]), first_vinfo_vf))
+ && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf))
{
mode_i++;
if (mode_i == vector_modes.length ())