}
}
+/* Return true if an operaton of kind KIND for STMT_INFO represents
+ the extraction of an element from a vector in preparation for
+ storing the element to memory. */
+static bool
+aarch64_is_store_elt_extraction (vect_cost_for_stmt kind,
+ stmt_vec_info stmt_info)
+{
+ return (kind == vec_to_scalar
+ && STMT_VINFO_DATA_REF (stmt_info)
+ && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)));
+}
+
/* Return true if STMT_INFO represents part of a reduction. */
static bool
aarch64_is_reduction (stmt_vec_info stmt_info)
/* Detect cases in which vec_to_scalar is describing the extraction of a
vector element in preparation for a scalar store. The store itself is
costed separately. */
- if (kind == vec_to_scalar
- && STMT_VINFO_DATA_REF (stmt_info)
- && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
+ if (aarch64_is_store_elt_extraction (kind, stmt_info))
return simd_costs->store_elt_extra_cost;
/* Detect SVE gather loads, which are costed as a single scalar_load
if (vectype && aarch64_sve_only_stmt_p (stmt_info, vectype))
costs->saw_sve_only_op = true;
+ /* If we scalarize a strided store, the vectorizer costs one
+ vec_to_scalar for each element. However, we can store the first
+ element using an FP store without a separate extract step. */
+ if (aarch64_is_store_elt_extraction (kind, stmt_info))
+ count -= 1;
+
stmt_cost = aarch64_detect_scalar_stmt_subtype
(vinfo, kind, stmt_info, stmt_cost);