From 599bd99078439b9f11cb271aa919844318381ec5 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 11 Nov 2019 19:43:52 +0000 Subject: [PATCH] Fix SLP downward group access classification (PR92420) This PR was caused by the SLP handling in get_group_load_store_type returning VMAT_CONTIGUOUS rather than VMAT_CONTIGUOUS_REVERSE for downward groups. A more elaborate fix would be to try to combine the reverse permutation into SLP_TREE_LOAD_PERMUTATION for loads, but that's really a follow-on optimisation and not backport material. It might also not necessarily be a win, if the target supports (say) reversing and odd/even swaps as independent permutes but doesn't recognise the combined form. 2019-11-11 Richard Sandiford gcc/ PR tree-optimization/92420 * tree-vect-stmts.c (get_negative_load_store_type): Move further up file. (get_group_load_store_type): Use it for reversed SLP accesses. gcc/testsuite/ * gcc.dg/vect/pr92420.c: New test. From-SVN: r278064 --- gcc/ChangeLog | 7 +++ gcc/testsuite/ChangeLog | 4 ++ gcc/testsuite/gcc.dg/vect/pr92420.c | 48 ++++++++++++++++ gcc/tree-vect-stmts.c | 110 +++++++++++++++++++----------------- 4 files changed, 118 insertions(+), 51 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr92420.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 72d1e3d..329d107 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2019-11-11 Richard Sandiford + + PR tree-optimization/92420 + * tree-vect-stmts.c (get_negative_load_store_type): Move further + up file. + (get_group_load_store_type): Use it for reversed SLP accesses. + 2019-11-11 Jan Hubicka * ipa-prop.c (ipa_propagate_indirect_call_infos): Remove ipcp diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e734954..88088fa 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2019-11-11 Richard Sandiford + + * gcc.dg/vect/pr92420.c: New test. + 2019-11-11 Claudiu Zissulescu * gcc.target/arc/delay-slot-limm.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/pr92420.c b/gcc/testsuite/gcc.dg/vect/pr92420.c new file mode 100644 index 0000000..e43539f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr92420.c @@ -0,0 +1,48 @@ +/* { dg-additional-options "-mavx2" { target avx_runtime } } */ + +#include "tree-vect.h" + +#define N 16 +struct C { int r, i; }; +struct C a[N], b[N], c[N], d[N], e[N]; + +__attribute__((noipa)) static void +foo (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, int w) +{ + int i; + for (int i = 0; i < w; i++) + { + z[i].r = x[i].r * y[-1 - i].r - x[i].i * y[-1 - i].i; + z[i].i = x[i].i * y[-1 - i].r + x[i].r * y[-1 - i].i; + } +} + +__attribute__((noipa)) static void +bar (struct C *__restrict x, struct C *__restrict y, struct C *__restrict z, int w) +{ + int i; + for (int i = 0; i < w; i++) + { + z[i].r = x[i].r * y[i].r - x[i].i * y[i].i; + z[i].i = x[i].i * y[i].r + x[i].r * y[i].i; + } +} + +int +main () +{ + check_vect (); + int i; + for (i = 0; i < N; ++i) + { + a[i].r = N - i; a[i].i = i - N; + b[i].r = i - N; b[i].i = i + N; + c[i].r = -1 - i; c[i].i = 2 * N - 1 - i; + } + foo (a, b + N, d, N); + bar (a, c, e, N); + for (i = 0; i < N; ++i) + if (d[i].r != e[i].r || d[i].i != e[i].i) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index eaca014..1da949a 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2147,6 +2147,56 @@ perm_mask_for_reverse (tree vectype) return vect_gen_perm_mask_checked (vectype, indices); } +/* A subroutine of get_load_store_type, with a subset of the same + arguments. Handle the case where STMT_INFO is a load or store that + accesses consecutive elements with a negative step. */ + +static vect_memory_access_type +get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype, + vec_load_store_type vls_type, + unsigned int ncopies) +{ + dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); + dr_alignment_support alignment_support_scheme; + + if (ncopies > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "multiple types with negative step.\n"); + return VMAT_ELEMENTWISE; + } + + alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step but alignment required.\n"); + return VMAT_ELEMENTWISE; + } + + if (vls_type == VLS_STORE_INVARIANT) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "negative step with invariant source;" + " no permute needed.\n"); + return VMAT_CONTIGUOUS_DOWN; + } + + if (!perm_mask_for_reverse (vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "negative step and reversing not supported.\n"); + return VMAT_ELEMENTWISE; + } + + return VMAT_CONTIGUOUS_REVERSE; +} + /* STMT_INFO is either a masked or unconditional store. Return the value being stored. */ @@ -2273,7 +2323,15 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, "Peeling for outer loop is not supported\n"); return false; } - *memory_access_type = VMAT_CONTIGUOUS; + int cmp = compare_step_with_zero (stmt_info); + if (cmp < 0) + *memory_access_type = get_negative_load_store_type + (stmt_info, vectype, vls_type, 1); + else + { + gcc_assert (!loop_vinfo || cmp > 0); + *memory_access_type = VMAT_CONTIGUOUS; + } } } else @@ -2375,56 +2433,6 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, return true; } -/* A subroutine of get_load_store_type, with a subset of the same - arguments. Handle the case where STMT_INFO is a load or store that - accesses consecutive elements with a negative step. */ - -static vect_memory_access_type -get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype, - vec_load_store_type vls_type, - unsigned int ncopies) -{ - dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info); - dr_alignment_support alignment_support_scheme; - - if (ncopies > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "multiple types with negative step.\n"); - return VMAT_ELEMENTWISE; - } - - alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false); - if (alignment_support_scheme != dr_aligned - && alignment_support_scheme != dr_unaligned_supported) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step but alignment required.\n"); - return VMAT_ELEMENTWISE; - } - - if (vls_type == VLS_STORE_INVARIANT) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "negative step with invariant source;" - " no permute needed.\n"); - return VMAT_CONTIGUOUS_DOWN; - } - - if (!perm_mask_for_reverse (vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "negative step and reversing not supported.\n"); - return VMAT_ELEMENTWISE; - } - - return VMAT_CONTIGUOUS_REVERSE; -} - /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true if there is a memory access type that the vectorized form can use, storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers -- 2.7.4