From 3ba4ff4130903a3ded931d715a2204bd8834fe60 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 9 May 2018 10:35:31 +0000 Subject: [PATCH] Add clobbers around IFN_LOAD/STORE_LANES We build up the input to IFN_STORE_LANES one vector at a time. In RTL, each of these vector assignments becomes a write to subregs of the form (subreg:VEC (reg:AGGR R)), where R is the eventual input to the store lanes instruction. The problem is that RTL isn't very good at tracking liveness when things are initialised piecemeal by subregs, so R tends to end up being live on all paths from the entry block to the store. This in turn leads to unnecessary spilling around calls, as well as to excess register pressure in vector loops. This patch adds gimple clobbers to indicate the liveness of the IFN_STORE_LANES variable and makes sure that gimple clobbers are expanded to rtl clobbers where useful. For consistency it also uses clobbers to mark the point at which an IFN_LOAD_LANES variable is no longer needed. 2018-05-08 Richard Sandiford gcc/ * cfgexpand.c (expand_clobber): New function. (expand_gimple_stmt_1): Use it. * tree-vect-stmts.c (vect_clobber_variable): New function, split out from... (vectorizable_simd_clone_call): ...here. (vectorizable_store): Emit a clobber either side of an IFN_STORE_LANES sequence. (vectorizable_load): Emit a clobber after an IFN_LOAD_LANES sequence. gcc/testsuite/ * gcc.target/aarch64/store_lane_spill_1.c: New test. * gcc.target/aarch64/sve/store_lane_spill_1.c: Likewise. From-SVN: r260073 --- gcc/ChangeLog | 11 ++++++ gcc/cfgexpand.c | 22 ++++++++++- gcc/testsuite/ChangeLog | 5 +++ .../gcc.target/aarch64/store_lane_spill_1.c | 21 +++++++++++ .../gcc.target/aarch64/sve/store_lane_spill_1.c | 19 ++++++++++ gcc/tree-vect-stmts.c | 44 ++++++++++++++-------- 6 files changed, 105 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/store_lane_spill_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/store_lane_spill_1.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 654328f..83d01ed 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2018-05-09 Richard Sandiford + + * cfgexpand.c (expand_clobber): New function. + (expand_gimple_stmt_1): Use it. + * tree-vect-stmts.c (vect_clobber_variable): New function, + split out from... + (vectorizable_simd_clone_call): ...here. + (vectorizable_store): Emit a clobber either side of an + IFN_STORE_LANES sequence. + (vectorizable_load): Emit a clobber after an IFN_LOAD_LANES sequence. + 2018-05-09 Tom de Vries PR target/85626 diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index ff2f407..ef143a3 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -3582,6 +3582,26 @@ expand_return (tree retval, tree bounds) } } +/* Expand a clobber of LHS. If LHS is stored it in a multi-part + register, tell the rtl optimizers that its value is no longer + needed. */ + +static void +expand_clobber (tree lhs) +{ + if (DECL_P (lhs)) + { + rtx decl_rtl = DECL_RTL_IF_SET (lhs); + if (decl_rtl && REG_P (decl_rtl)) + { + machine_mode decl_mode = GET_MODE (decl_rtl); + if (maybe_gt (GET_MODE_SIZE (decl_mode), + REGMODE_NATURAL_SIZE (decl_mode))) + emit_clobber (decl_rtl); + } + } +} + /* A subroutine of expand_gimple_stmt, expanding one gimple statement STMT that doesn't require special handling for outgoing edges. That is no tailcalls and no GIMPLE_COND. */ @@ -3687,7 +3707,7 @@ expand_gimple_stmt_1 (gimple *stmt) if (TREE_CLOBBER_P (rhs)) /* This is a clobber to mark the going out of scope for this LHS. */ - ; + expand_clobber (lhs); else expand_assignment (lhs, rhs, gimple_assign_nontemporal_move_p ( diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9555222..85c2255 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-05-09 Richard Sandiford + + * gcc.target/aarch64/store_lane_spill_1.c: New test. + * gcc.target/aarch64/sve/store_lane_spill_1.c: Likewise. + 2018-05-08 Carl Love * gcc.target/powerpc/builtins-8-p9-runnable.c: Add new test file. diff --git a/gcc/testsuite/gcc.target/aarch64/store_lane_spill_1.c b/gcc/testsuite/gcc.target/aarch64/store_lane_spill_1.c new file mode 100644 index 0000000..80683c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/store_lane_spill_1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#pragma GCC target "+nosve" + +int cont (void); + +void +f (int (*x)[3], int *a, int *b, int *c, int n) +{ + do + for (int i = 0; i < n; ++i) + { + x[i][0] = a[i] + 1; + x[i][1] = b[i] + 2; + x[i][2] = c[i] + 3; + } + while (cont ()); +} + +/* { dg-final { scan-assembler-not {\tst1\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/store_lane_spill_1.c b/gcc/testsuite/gcc.target/aarch64/sve/store_lane_spill_1.c new file mode 100644 index 0000000..7e17f62 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/store_lane_spill_1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +int cont (void); + +void +f (int (*x)[3], int *a, int *b, int *c, int n) +{ + do + for (int i = 0; i < n; ++i) + { + x[i][0] = a[i] + 1; + x[i][1] = b[i] + 2; + x[i][2] = c[i] + 3; + } + while (cont ()); +} + +/* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 7ebc426..1e8ccbc 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -182,6 +182,17 @@ create_array_ref (tree type, tree ptr, tree alias_ptr_type) return mem_ref; } +/* Add a clobber of variable VAR to the vectorization of STMT. + Emit the clobber before *GSI. */ + +static void +vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var) +{ + tree clobber = build_clobber (TREE_TYPE (var)); + gimple *new_stmt = gimple_build_assign (var, clobber); + vect_finish_stmt_generation (stmt, new_stmt, gsi); +} + /* Utility functions used by vect_mark_stmts_to_be_vectorized. */ /* Function vect_mark_relevant. @@ -4128,12 +4139,7 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi, } if (ratype) - { - tree clobber = build_constructor (ratype, NULL); - TREE_THIS_VOLATILE (clobber) = 1; - new_stmt = gimple_build_assign (new_temp, clobber); - vect_finish_stmt_generation (stmt, new_stmt, gsi); - } + vect_clobber_variable (stmt, gsi, new_temp); continue; } else if (simd_clone_subparts (vectype) > nunits) @@ -4156,10 +4162,7 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi, CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, gimple_assign_lhs (new_stmt)); } - tree clobber = build_constructor (ratype, NULL); - TREE_THIS_VOLATILE (clobber) = 1; - new_stmt = gimple_build_assign (new_temp, clobber); - vect_finish_stmt_generation (stmt, new_stmt, gsi); + vect_clobber_variable (stmt, gsi, new_temp); } else CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp); @@ -4186,11 +4189,7 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi, new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t); vect_finish_stmt_generation (stmt, new_stmt, gsi); - tree clobber = build_constructor (ratype, NULL); - TREE_THIS_VOLATILE (clobber) = 1; - vect_finish_stmt_generation (stmt, - gimple_build_assign (new_temp, - clobber), gsi); + vect_clobber_variable (stmt, gsi, new_temp); } } @@ -6913,8 +6912,15 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, { tree vec_array; - /* Combine all the vectors into an array. */ + /* Get an array into which we can store the individual vectors. */ vec_array = create_vector_array (vectype, vec_num); + + /* Invalidate the current contents of VEC_ARRAY. This should + become an RTL clobber too, which prevents the vector registers + from being upward-exposed. */ + vect_clobber_variable (stmt, gsi, vec_array); + + /* Store the individual vectors into the array. */ for (i = 0; i < vec_num; i++) { vec_oprnd = dr_chain[i]; @@ -6953,6 +6959,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, gimple_call_set_nothrow (call, true); new_stmt = call; vect_finish_stmt_generation (stmt, new_stmt, gsi); + + /* Record that VEC_ARRAY is now dead. */ + vect_clobber_variable (stmt, gsi, vec_array); } else { @@ -8105,6 +8114,9 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, /* Record the mapping between SSA_NAMEs and statements. */ vect_record_grouped_load_vectors (stmt, dr_chain); + + /* Record that VEC_ARRAY is now dead. */ + vect_clobber_variable (stmt, gsi, vec_array); } else { -- 2.7.4