From f27848a5dc4d3b16cd4112bddcb59e0916eba623 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 27 Apr 2022 14:06:12 +0200 Subject: [PATCH] tree-optimization/105219 - bogus max iters for vectorized epilogue The following makes sure to take into account prologue peeling when trying to narrow down the maximum number of iterations computed for the vectorized epilogue. A similar issue exists when peeling for gaps. 2022-04-27 Richard Biener PR tree-optimization/105219 * tree-vect-loop.cc (vect_transform_loop): Disable special code narrowing the vectorized epilogue max iterations when peeling for alignment or gaps was in effect. * gcc.dg/vect/pr105219.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr105219.c | 29 +++++++++++++++++++++++++++++ gcc/tree-vect-loop.cc | 6 +++++- 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr105219.c diff --git a/gcc/testsuite/gcc.dg/vect/pr105219.c b/gcc/testsuite/gcc.dg/vect/pr105219.c new file mode 100644 index 0000000..0cb7ae2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr105219.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3" } */ +/* { dg-additional-options "-mtune=intel" { target x86_64-*-* i?86-*-* } } */ + +#include "tree-vect.h" + +int data[128]; + +void __attribute((noipa)) +foo (int *data, int n) +{ + for (int i = 0; i < n; ++i) + data[i] = i; +} + +int main() +{ + check_vect (); + for (int start = 0; start < 16; ++start) + for (int n = 1; n < 3*16; ++n) + { + __builtin_memset (data, 0, sizeof (data)); + foo (&data[start], n); + for (int j = 0; j < n; ++j) + if (data[start + j] != j) + __builtin_abort (); + } + return 0; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index d7bc346..f53a634 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -9977,7 +9977,11 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) lowest_vf) - 1 : wi::udiv_floor (loop->nb_iterations_upper_bound + bias_for_lowest, lowest_vf) - 1); - if (main_vinfo) + if (main_vinfo + /* Both peeling for alignment and peeling for gaps can end up + with the scalar epilogue running for more than VF-1 iterations. */ + && !main_vinfo->peeling_for_alignment + && !main_vinfo->peeling_for_gaps) { unsigned int bound; poly_uint64 main_iters -- 2.7.4