2019-11-13 Richard Sandiford <richard.sandiford@arm.com>
+ * tree-vect-loop.c (vect_estimate_min_profitable_iters): Include
+ the cost of generating loop masks.
+
+2019-11-13 Richard Sandiford <richard.sandiford@arm.com>
+
* tree-vectorizer.h (vect_apply_runtime_profitability_check_p):
New function.
* tree-vect-loop-manip.c (vect_loop_versioning): Use it.
2019-11-13 Richard Sandiford <richard.sandiford@arm.com>
+ * gcc.target/aarch64/sve/mask_struct_store_3.c: Add
+ -fno-vect-cost-model.
+ * gcc.target/aarch64/sve/mask_struct_store_3_run.c: Likewise.
+ * gcc.target/aarch64/sve/peel_ind_2.c: Likewise.
+ * gcc.target/aarch64/sve/peel_ind_2_run.c: Likewise.
+ * gcc.target/aarch64/sve/peel_ind_3.c: Likewise.
+ * gcc.target/aarch64/sve/peel_ind_3_run.c: Likewise.
+
+2019-11-13 Richard Sandiford <richard.sandiford@arm.com>
+
PR c++/92206
* g++.dg/cpp0x/alias-decl-pr92206-1.C: New test.
* g++.dg/cpp0x/alias-decl-pr92206-2.C: Likewise.
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -fno-vect-cost-model" } */
#include <stdint.h>
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -fno-vect-cost-model" } */
#include "mask_struct_store_3.c"
/* { dg-do compile } */
/* Pick an arbitrary target for which unaligned accesses are more
expensive. */
-/* { dg-options "-O3 -msve-vector-bits=256 -mtune=thunderx" } */
+/* { dg-options "-O3 -msve-vector-bits=256 -mtune=thunderx -fno-vect-cost-model" } */
#define N 512
#define START 7
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O3 -mtune=thunderx" } */
-/* { dg-options "-O3 -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-options "-O3 -mtune=thunderx -msve-vector-bits=256 -fno-vect-cost-model" { target aarch64_sve256_hw } } */
#include "peel_ind_2.c"
/* { dg-do compile } */
/* Pick an arbitrary target for which unaligned accesses are more
expensive. */
-/* { dg-options "-O3 -msve-vector-bits=256 -mtune=thunderx" } */
+/* { dg-options "-O3 -msve-vector-bits=256 -mtune=thunderx -fno-vect-cost-model" } */
#define N 32
#define MAX_START 8
/* { dg-do run { target aarch64_sve_hw } } */
-/* { dg-options "-O3 -mtune=thunderx" } */
-/* { dg-options "-O3 -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-options "-O3 -mtune=thunderx -fno-vect-cost-model" } */
+/* { dg-options "-O3 -mtune=thunderx -msve-vector-bits=256 -fno-vect-cost-model" { target aarch64_sve256_hw } } */
#include "peel_ind_3.c"
si->kind, si->stmt_info, si->misalign,
vect_epilogue);
}
+
+ /* Calculate how many masks we need to generate. */
+ unsigned int num_masks = 0;
+ rgroup_masks *rgm;
+ unsigned int num_vectors_m1;
+ FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)
+ if (rgm->mask_type)
+ num_masks += num_vectors_m1 + 1;
+ gcc_assert (num_masks > 0);
+
+ /* In the worst case, we need to generate each mask in the prologue
+ and in the loop body. One of the loop body mask instructions
+ replaces the comparison in the scalar loop, and since we don't
+ count the scalar comparison against the scalar body, we shouldn't
+ count that vector instruction against the vector body either.
+
+ Sometimes we can use unpacks instead of generating prologue
+ masks and sometimes the prologue mask will fold to a constant,
+ so the actual prologue cost might be smaller. However, it's
+ simpler and safer to use the worst-case cost; if this ends up
+ being the tie-breaker between vectorizing or not, then it's
+ probably better not to vectorize. */
+ (void) add_stmt_cost (target_cost_data, num_masks, vector_stmt,
+ NULL, 0, vect_prologue);
+ (void) add_stmt_cost (target_cost_data, num_masks - 1, vector_stmt,
+ NULL, 0, vect_body);
}
else if (npeel < 0)
{