From f2ebf2d98efe0ac2314b58cf474f44cb8ebd5244 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 13 Apr 2022 17:53:54 +0100 Subject: [PATCH] aarch64: Make sure the UF divides the VF [PR105254] In this PR, we were trying to set the unroll factor to a value higher than the minimum VF (or more specifically, to a value that doesn't divide the VF). I guess there are two approaches to this: let the target pick any value it likes and make target-independent code pare it back to something that makes sense, or require targets to supply sensible values from the outset. This patch goes for the latter approach. gcc/ PR tree-optimization/105254 * config/aarch64/aarch64.cc (aarch64_vector_costs::determine_suggested_unroll_factor): Take a loop_vec_info as argument. Restrict the unroll factor to values that divide the VF. (aarch64_vector_costs::finish_cost): Update call accordingly. gcc/testsuite/ PR tree-optimization/105254 * g++.dg/vect/pr105254.cc: New test. --- gcc/config/aarch64/aarch64.cc | 12 ++++++++---- gcc/testsuite/g++.dg/vect/pr105254.cc | 26 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/g++.dg/vect/pr105254.cc diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index cf62bdd..f650abb 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -15637,7 +15637,7 @@ private: unsigned int adjust_body_cost (loop_vec_info, const aarch64_vector_costs *, unsigned int); bool prefer_unrolled_loop () const; - unsigned int determine_suggested_unroll_factor (); + unsigned int determine_suggested_unroll_factor (loop_vec_info); /* True if we have performed one-time initialization based on the vec_info. */ @@ -16746,7 +16746,8 @@ adjust_body_cost_sve (const aarch64_vec_op_count *ops, } unsigned int -aarch64_vector_costs::determine_suggested_unroll_factor () +aarch64_vector_costs:: +determine_suggested_unroll_factor (loop_vec_info loop_vinfo) { bool sve = m_vec_flags & VEC_ANY_SVE; /* If we are trying to unroll an Advanced SIMD main loop that contains @@ -16760,6 +16761,7 @@ aarch64_vector_costs::determine_suggested_unroll_factor () return 1; unsigned int max_unroll_factor = 1; + auto vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); for (auto vec_ops : m_ops) { aarch64_simd_vec_issue_info const *vec_issue @@ -16768,7 +16770,8 @@ aarch64_vector_costs::determine_suggested_unroll_factor () return 1; /* Limit unroll factor to a value adjustable by the user, the default value is 4. */ - unsigned int unroll_factor = aarch64_vect_unroll_limit; + unsigned int unroll_factor = MIN (aarch64_vect_unroll_limit, + (int) known_alignment (vf)); unsigned int factor = vec_ops.reduction_latency > 1 ? vec_ops.reduction_latency : 1; unsigned int temp; @@ -16946,7 +16949,8 @@ aarch64_vector_costs::finish_cost (const vector_costs *uncast_scalar_costs) { m_costs[vect_body] = adjust_body_cost (loop_vinfo, scalar_costs, m_costs[vect_body]); - m_suggested_unroll_factor = determine_suggested_unroll_factor (); + m_suggested_unroll_factor + = determine_suggested_unroll_factor (loop_vinfo); } /* Apply the heuristic described above m_stp_sequence_cost. Prefer diff --git a/gcc/testsuite/g++.dg/vect/pr105254.cc b/gcc/testsuite/g++.dg/vect/pr105254.cc new file mode 100644 index 0000000..3481467 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr105254.cc @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fvect-cost-model=dynamic -mcpu=zeus" { target aarch64*-*-* } } */ + +template +struct complex; + +template <> +struct complex { + void operator+= (complex r) { v_ += r.v_; } + _Complex v_; +}; + +template +void +bar (T); + +void +foo (complex *x) +{ + complex s = {0.0}; + + for (int i = 0; i < 16; ++i) + s += x[i]; + + bar > (s); +} -- 2.7.4