From 7c679969bac9b7ae5e9446bfaa5466e19063d690 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 26 Mar 2021 16:08:32 +0000 Subject: [PATCH] aarch64: Add costs for one element of a scatter store Currently each element in a gather load is costed as a scalar_load and each element in a scatter store is costed as a scalar_store. The load side seems to work pretty well in practice, since many CPU-specific costs give loads quite a high cost relative to arithmetic operations. However, stores usually have a cost of just 1, which means that scatters tend to appear too cheap. This patch adds a separate cost for one element in a scatter store. Like with the previous patches, this one only becomes active if a CPU selects use_new_vector_costs. It should therefore have a very low impact on other CPUs. gcc/ * config/aarch64/aarch64-protos.h (sve_vec_cost::scatter_store_elt_cost): New member variable. * config/aarch64/aarch64.c (generic_sve_vector_cost): Update accordingly, taking the cost from the cost of a scalar_store. (a64fx_sve_vector_cost): Likewise. (aarch64_detect_vector_stmt_subtype): Detect scatter stores. --- gcc/config/aarch64/aarch64-protos.h | 9 +++++++-- gcc/config/aarch64/aarch64.c | 13 +++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index fabe3df..2ffa96e 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -256,12 +256,14 @@ struct sve_vec_cost : simd_vec_cost unsigned int clast_cost, unsigned int fadda_f16_cost, unsigned int fadda_f32_cost, - unsigned int fadda_f64_cost) + unsigned int fadda_f64_cost, + unsigned int scatter_store_elt_cost) : simd_vec_cost (base), clast_cost (clast_cost), fadda_f16_cost (fadda_f16_cost), fadda_f32_cost (fadda_f32_cost), - fadda_f64_cost (fadda_f64_cost) + fadda_f64_cost (fadda_f64_cost), + scatter_store_elt_cost (scatter_store_elt_cost) {} /* The cost of a vector-to-scalar CLASTA or CLASTB instruction, @@ -274,6 +276,9 @@ struct sve_vec_cost : simd_vec_cost const int fadda_f16_cost; const int fadda_f32_cost; const int fadda_f64_cost; + + /* The per-element cost of a scatter store. */ + const int scatter_store_elt_cost; }; /* Cost for vector insn classes. */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 20bb75b..7f72741 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -638,7 +638,8 @@ static const sve_vec_cost generic_sve_vector_cost = 2, /* clast_cost */ 2, /* fadda_f16_cost */ 2, /* fadda_f32_cost */ - 2 /* fadda_f64_cost */ + 2, /* fadda_f64_cost */ + 1 /* scatter_store_elt_cost */ }; /* Generic costs for vector insn classes. */ @@ -705,7 +706,8 @@ static const sve_vec_cost a64fx_sve_vector_cost = 13, /* clast_cost */ 13, /* fadda_f16_cost */ 13, /* fadda_f32_cost */ - 13 /* fadda_f64_cost */ + 13, /* fadda_f64_cost */ + 1 /* scatter_store_elt_cost */ }; static const struct cpu_vector_cost a64fx_vector_cost = @@ -14279,6 +14281,13 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))) return simd_costs->store_elt_extra_cost; + /* Detect cases in which a scalar_store is really storing one element + in a scatter operation. */ + if (kind == scalar_store + && sve_costs + && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER) + return sve_costs->scatter_store_elt_cost; + /* Detect cases in which vec_to_scalar represents an in-loop reduction. */ if (kind == vec_to_scalar && where == vect_body -- 2.7.4