* cfgloop.h (struct loop): Add simdlen member.
* cfgloopmanip.c (copy_loop_info): Copy simdlen as well.
* omp-expand.c (expand_omp_simd): Set it if simdlen clause is present.
* tree-vect-loop.c (vect_analyze_loop): Pass loop->simdlen != 0
as new argument to autovectorize_vector_sizes target hook. If
loop->simdlen, pick up vector size where the vectorization factor
is equal to loop->simd, and if there is none, fall back to the first
successful one.
(vect_transform_loop): Adjust autovectorize_vector_sizes target hook
caller.
* omp-low.c (omp_clause_aligned_alignment): Likewise.
* omp-general.c (omp_max_vf): Likewise.
* optabs-query.c (can_vec_mask_load_store_p): Likewise.
* tree-vect-slp.c (vect_slp_bb): Likewise.
* target.def (autovectorize_vector_sizes): Add ALL argument and
document it.
* doc/tm.texi: Adjust documentation.
* targhooks.c (default_autovectorize_vector_sizes): Add bool argument.
* targhooks.h (default_autovectorize_vector_sizes): Likewise.
* config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Add
bool argument.
* config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise.
* config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise.
* config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise.
* config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise. If
true and TARGET_AVX512F or TARGET_AVX, push 3 or 2 sizes even if
preferred vector size is not 512-bit or 256-bit, just put those
unpreferred ones last.
* gcc.target/i386/avx512f-simd-1.c: New test.
From-SVN: r271403
+2019-05-20 Jakub Jelinek <jakub@redhat.com>
+
+ * cfgloop.h (struct loop): Add simdlen member.
+ * cfgloopmanip.c (copy_loop_info): Copy simdlen as well.
+ * omp-expand.c (expand_omp_simd): Set it if simdlen clause is present.
+ * tree-vect-loop.c (vect_analyze_loop): Pass loop->simdlen != 0
+ as new argument to autovectorize_vector_sizes target hook. If
+ loop->simdlen, pick up vector size where the vectorization factor
+ is equal to loop->simd, and if there is none, fall back to the first
+ successful one.
+ (vect_transform_loop): Adjust autovectorize_vector_sizes target hook
+ caller.
+ * omp-low.c (omp_clause_aligned_alignment): Likewise.
+ * omp-general.c (omp_max_vf): Likewise.
+ * optabs-query.c (can_vec_mask_load_store_p): Likewise.
+ * tree-vect-slp.c (vect_slp_bb): Likewise.
+ * target.def (autovectorize_vector_sizes): Add ALL argument and
+ document it.
+ * doc/tm.texi: Adjust documentation.
+ * targhooks.c (default_autovectorize_vector_sizes): Add bool argument.
+ * targhooks.h (default_autovectorize_vector_sizes): Likewise.
+ * config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Add
+ bool argument.
+ * config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise.
+ * config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise.
+ * config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise.
+ * config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise. If
+ true and TARGET_AVX512F or TARGET_AVX, push 3 or 2 sizes even if
+ preferred vector size is not 512-bit or 256-bit, just put those
+ unpreferred ones last.
+
2019-05-20 Martin Liska <mliska@suse.cz>
* targhooks.c (default_libc_has_fast_function): New function.
of the loop can be safely evaluated concurrently. */
int safelen;
+ /* Preferred vectorization factor for the loop if non-zero. */
+ int simdlen;
+
/* Constraints are generally set by consumers and affect certain
semantics of niter analyzer APIs. Currently the APIs affected are
number_of_iterations_exit* functions and their callers. One typical
target->nb_iterations_estimate = loop->nb_iterations_estimate;
target->estimate_state = loop->estimate_state;
target->safelen = loop->safelen;
+ target->simdlen = loop->simdlen;
target->constraints = loop->constraints;
target->can_be_parallel = loop->can_be_parallel;
target->warned_aggressive_loop_optimizations
/* Return a list of possible vector sizes for the vectorizer
to iterate over. */
static void
-aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
+aarch64_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (TARGET_SVE)
sizes->safe_push (BYTES_PER_SVE_VECTOR);
TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
static void
-arc_autovectorize_vector_sizes (vector_sizes *sizes)
+arc_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (TARGET_PLUS_QMACW)
{
static void arm_conditional_register_usage (void);
static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
-static void arm_autovectorize_vector_sizes (vector_sizes *);
+static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
static int arm_default_branch_cost (bool, bool);
static int arm_cortex_a5_branch_cost (bool, bool);
static int arm_cortex_m_branch_cost (bool, bool);
}
static void
-arm_autovectorize_vector_sizes (vector_sizes *sizes)
+arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (!TARGET_NEON_VECTORIZE_DOUBLE)
{
256bit and 128bit vectors. */
static void
-ix86_autovectorize_vector_sizes (vector_sizes *sizes)
+ix86_autovectorize_vector_sizes (vector_sizes *sizes, bool all)
{
if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
{
sizes->safe_push (32);
sizes->safe_push (16);
}
+ else if (TARGET_AVX512F && all)
+ {
+ sizes->safe_push (32);
+ sizes->safe_push (16);
+ sizes->safe_push (64);
+ }
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
{
sizes->safe_push (32);
sizes->safe_push (16);
}
+ else if (TARGET_AVX && all)
+ {
+ sizes->safe_push (16);
+ sizes->safe_push (32);
+ }
}
/* Implemenation of targetm.vectorize.get_mask_mode. */
/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */
static void
-mips_autovectorize_vector_sizes (vector_sizes *sizes)
+mips_autovectorize_vector_sizes (vector_sizes *sizes, bool)
{
if (ISA_HAS_MSA)
sizes->safe_push (16);
reached. The default is @var{mode} which means no splitting.
@end deftypefn
-@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes})
+@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes}, bool @var{all})
If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not
the only one that is worth considering, this hook should add all suitable
vector sizes to @var{sizes}, in order of decreasing preference. The first
one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
+If @var{all} is true, add suitable vector sizes even when they are generally
+not expected to be worthwhile.
The hook does not need to do anything if the vector returned by
@code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant
&& loop->safelen > 1)
{
loop->force_vectorize = true;
+ if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
+ {
+ unsigned HOST_WIDE_INT v
+ = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
+ if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
+ loop->simdlen = v;
+ }
cfun->has_force_vectorize_loops = true;
}
else if (dont_vectorize)
return 1;
auto_vector_sizes sizes;
- targetm.vectorize.autovectorize_vector_sizes (&sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
if (!sizes.is_empty ())
{
poly_uint64 vf = 0;
unsigned int al = 1;
opt_scalar_mode mode_iter;
auto_vector_sizes sizes;
- targetm.vectorize.autovectorize_vector_sizes (&sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&sizes, true);
poly_uint64 vs = 0;
for (unsigned int i = 0; i < sizes.length (); ++i)
vs = ordered_max (vs, sizes[i]);
return true;
auto_vector_sizes vector_sizes;
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, true);
for (unsigned int i = 0; i < vector_sizes.length (); ++i)
{
poly_uint64 cur = vector_sizes[i];
the only one that is worth considering, this hook should add all suitable\n\
vector sizes to @var{sizes}, in order of decreasing preference. The first\n\
one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.\n\
+If @var{all} is true, add suitable vector sizes even when they are generally\n\
+not expected to be worthwhile.\n\
\n\
The hook does not need to do anything if the vector returned by\n\
@code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant\n\
for autovectorization. The default implementation does nothing.",
void,
- (vector_sizes *sizes),
+ (vector_sizes *sizes, bool all),
default_autovectorize_vector_sizes)
/* Function to get a target mode for a vector mask. */
is tried. */
void
-default_autovectorize_vector_sizes (vector_sizes *)
+default_autovectorize_vector_sizes (vector_sizes *, bool)
{
}
int, bool);
extern machine_mode default_preferred_simd_mode (scalar_mode mode);
extern machine_mode default_split_reduction (machine_mode);
-extern void default_autovectorize_vector_sizes (vector_sizes *);
+extern void default_autovectorize_vector_sizes (vector_sizes *, bool);
extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64);
extern bool default_empty_mask_is_expensive (unsigned);
extern void *default_init_cost (struct loop *);
+2019-05-20 Jakub Jelinek <jakub@redhat.com>
+
+ * gcc.target/i386/avx512f-simd-1.c: New test.
+
2019-05-20 Christophe Lyon <christophe.lyon@linaro.org>
* gcc.target/aarch64/target_attr_10.c: Add quotes to expected
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-fopenmp-simd -O2 -mavx512f -masm=att" } */
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%xmm" } } */
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%ymm" } } */
+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%zmm" } } */
+
+#define N 1024
+int a[N];
+
+void
+f1 (void)
+{
+ int i;
+ #pragma omp simd simdlen (4)
+ for (i = 0; i < N; ++i)
+ a[i] = a[i] + 1;
+}
+
+void
+f2 (void)
+{
+ int i;
+ #pragma omp simd simdlen (8)
+ for (i = 0; i < N; ++i)
+ a[i] = a[i] + 2;
+}
+
+void
+f3 (void)
+{
+ int i;
+ #pragma omp simd simdlen (16)
+ for (i = 0; i < N; ++i)
+ a[i] = a[i] + 3;
+}
/* Autodetect first vector size we try. */
current_vector_size = 0;
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes,
+ loop->simdlen != 0);
unsigned int next_size = 0;
DUMP_VECT_SCOPE ("analyze_loop_nest");
unsigned n_stmts = 0;
poly_uint64 autodetected_vector_size = 0;
+ opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
+ poly_uint64 first_vector_size = 0;
while (1)
{
/* Check the CFG characteristics of the loop (nesting, entry/exit). */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"bad loop form.\n");
+ gcc_checking_assert (first_loop_vinfo == NULL);
return loop_vinfo;
}
{
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
- return loop_vinfo;
+ if (loop->simdlen
+ && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ (unsigned HOST_WIDE_INT) loop->simdlen))
+ {
+ if (first_loop_vinfo == NULL)
+ {
+ first_loop_vinfo = loop_vinfo;
+ first_vector_size = current_vector_size;
+ loop->aux = NULL;
+ }
+ else
+ delete loop_vinfo;
+ }
+ else
+ {
+ delete first_loop_vinfo;
+ return loop_vinfo;
+ }
}
-
- delete loop_vinfo;
+ else
+ delete loop_vinfo;
if (next_size == 0)
autodetected_vector_size = current_vector_size;
&& known_eq (vector_sizes[next_size], autodetected_vector_size))
next_size += 1;
- if (fatal
- || next_size == vector_sizes.length ()
+ if (fatal)
+ {
+ gcc_checking_assert (first_loop_vinfo == NULL);
+ return opt_loop_vec_info::propagate_failure (res);
+ }
+
+ if (next_size == vector_sizes.length ()
|| known_eq (current_vector_size, 0U))
- return opt_loop_vec_info::propagate_failure (res);
+ {
+ if (first_loop_vinfo)
+ {
+ current_vector_size = first_vector_size;
+ loop->aux = (loop_vec_info) first_loop_vinfo;
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Choosing vector size ");
+ dump_dec (MSG_NOTE, current_vector_size);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ return first_loop_vinfo;
+ }
+ else
+ return opt_loop_vec_info::propagate_failure (res);
+ }
/* Try the next biggest vector size. */
current_vector_size = vector_sizes[next_size++];
if (epilogue)
{
auto_vector_sizes vector_sizes;
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
unsigned int next_size = 0;
/* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work
/* Autodetect first vector size we try. */
current_vector_size = 0;
- targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
unsigned int next_size = 0;
gsi = gsi_start_bb (bb);