From c1bee668987f371617586edafa0dfccf04162a83 Mon Sep 17 00:00:00 2001 From: rguenth Date: Thu, 18 Jun 2015 09:39:13 +0000 Subject: [PATCH] 2015-06-18 Richard Biener PR tree-optimization/66510 * tree-vect-stmts.c (vectorizable_load): Properly compute the number of vector loads for SLP permuted loads. * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Also check the stride for loop vectorization. (vect_enhance_data_refs_alignment): Deal with SLP adjusted vectorization factor. (vect_analyze_group_access): If the group size is not a power of two require a epilogue loop. * tree-vect-loop.c (vect_analyze_loop_2): Move alignment compute and optimizing and alias test pruning after final vectorization factor computation. * tree-vect-slp.c (vect_build_slp_tree_1): Remove check on vector alignment. (vect_transform_slp_perm_load): Properly compute the original number of vector load stmts. * gcc.dg/vect/slp-perm-12.c: New testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@224598 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 19 ++++++++++++ gcc/testsuite/ChangeLog | 5 ++++ gcc/testsuite/gcc.dg/vect/slp-perm-12.c | 52 +++++++++++++++++++++++++++++++++ gcc/tree-vect-data-refs.c | 35 ++++++++++++++-------- gcc/tree-vect-loop.c | 48 +++++++++++++++--------------- gcc/tree-vect-slp.c | 44 +++++++--------------------- gcc/tree-vect-stmts.c | 8 ++++- 7 files changed, 141 insertions(+), 70 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/slp-perm-12.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 17e1c31..22b3325 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2015-06-18 Richard Biener + + PR tree-optimization/66510 + * tree-vect-stmts.c (vectorizable_load): Properly compute the + number of vector loads for SLP permuted loads. + * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Also + check the stride for loop vectorization. + (vect_enhance_data_refs_alignment): Deal with SLP adjusted + vectorization factor. + (vect_analyze_group_access): If the group size is not a power + of two require a epilogue loop. + * tree-vect-loop.c (vect_analyze_loop_2): Move alignment + compute and optimizing and alias test pruning after final + vectorization factor computation. + * tree-vect-slp.c (vect_build_slp_tree_1): Remove check on + vector alignment. + (vect_transform_slp_perm_load): Properly compute the original + number of vector load stmts. + 2015-06-18 Uros Bizjak * doc/invoke.texi (-fsanitize-sections): Split @var to avoid diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bdf4bd2..80727d1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-06-18 Richard Biener + + PR tree-optimization/66510 + * gcc.dg/vect/slp-perm-12.c: New testcase. + 2015-06-17 Uros Bizjak * gcc.target/i386/noplt-1.c (dg-do): Fix target selector. diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-12.c b/gcc/testsuite/gcc.dg/vect/slp-perm-12.c new file mode 100644 index 0000000..4d4c534 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-12.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_pack_trunc } */ +/* { dg-additional-options "-msse4" { target { i?86-*-* x86_64-*-* } } } */ + +#include "tree-vect.h" + +extern void abort (void); + +unsigned char a[64]; +short b[88]; + +void __attribute__((noinline)) +test(unsigned char * __restrict__ dst, short * __restrict__ tptr) +{ + int i; + for (i = 0; i < 8; i++) + { + dst[0] = (tptr[0] - tptr[0 + 3]); + dst[1] = (tptr[1] - tptr[1 + 3]); + dst[2] = (tptr[2] - tptr[2 + 3]); + dst[3] = (tptr[3] - tptr[3 + 3]); + dst[4] = (tptr[4] - tptr[4 + 3]); + dst[5] = (tptr[5] - tptr[5 + 3]); + dst[6] = (tptr[6] - tptr[6 + 3]); + dst[7] = (tptr[7] - tptr[7 + 3]); + dst += 8; + tptr += 11; + } +} + +int main() +{ + int i; + + check_vect (); + + for (i = 0; i < 88; ++i) + { + b[i] = i; + __asm__ volatile (""); + } + + test (a, b); + + for (i = 0; i < 64; ++i) + if (a[i] != 253) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 3fc1226..b626e38 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -691,21 +691,22 @@ vect_compute_data_ref_alignment (struct data_reference *dr) } } - /* Similarly, if we're doing basic-block vectorization, we can only use - base and misalignment information relative to an innermost loop if the - misalignment stays the same throughout the execution of the loop. - As above, this is the case if the stride of the dataref evenly divides - by the vector size. */ - if (!loop) + /* Similarly we can only use base and misalignment information relative to + an innermost loop if the misalignment stays the same throughout the + execution of the loop. As above, this is the case if the stride of + the dataref evenly divides by the vector size. */ + else { tree step = DR_STEP (dr); + unsigned vf = loop ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1; if (tree_fits_shwi_p (step) - && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0) + && ((tree_to_shwi (step) * vf) + % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "SLP: step doesn't divide the vector-size.\n"); + "step doesn't divide the vector-size.\n"); misalign = NULL_TREE; } } @@ -1440,7 +1441,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) We do this automtically for cost model, since we calculate cost for every peeling option. */ if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) - possible_npeel_number = vf /nelements; + { + if (STMT_SLP_TYPE (stmt_info)) + possible_npeel_number + = (vf * GROUP_SIZE (stmt_info)) / nelements; + else + possible_npeel_number = vf / nelements; + } /* Handle the aligned case. We may decide to align some other access, making DR unaligned. */ @@ -1453,7 +1460,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) for (j = 0; j < possible_npeel_number; j++) { - gcc_assert (npeel_tmp <= vf); vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp); npeel_tmp += nelements; } @@ -2230,8 +2236,13 @@ vect_analyze_group_access (struct data_reference *dr) BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt); } - /* There is a gap in the end of the group. */ - if (groupsize - last_accessed_element > 0 && loop_vinfo) + /* If there is a gap in the end of the group or the group size cannot + be made a multiple of the vector element count then we access excess + elements in the last iteration and thus need to peel that off. */ + if (loop_vinfo + && (groupsize - last_accessed_element > 0 + || exact_log2 (groupsize) == -1)) + { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 584a32cb..4b01ade 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1791,6 +1791,22 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) return false; } + /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ + ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts); + if (!ok) + return false; + + /* If there are any SLP instances mark them as pure_slp. */ + bool slp = vect_make_slp_decision (loop_vinfo); + if (slp) + { + /* Find stmts that need to be both vectorized and SLPed. */ + vect_detect_hybrid_slp (loop_vinfo); + + /* Update the vectorization factor based on the SLP decision. */ + vect_update_vf_for_slp (loop_vinfo); + } + /* Analyze the alignment of the data-refs in the loop. Fail if a data reference is found that cannot be vectorized. */ @@ -1830,31 +1846,17 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) return false; } - /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ - ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts); - if (ok) + if (slp) { - /* If there are any SLP instances mark them as pure_slp. */ - if (vect_make_slp_decision (loop_vinfo)) - { - /* Find stmts that need to be both vectorized and SLPed. */ - vect_detect_hybrid_slp (loop_vinfo); - - /* Update the vectorization factor based on the SLP decision. */ - vect_update_vf_for_slp (loop_vinfo); - - /* Analyze operations in the SLP instances. Note this may - remove unsupported SLP instances which makes the above - SLP kind detection invalid. */ - unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length (); - vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), - LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); - if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size) - return false; - } + /* Analyze operations in the SLP instances. Note this may + remove unsupported SLP instances which makes the above + SLP kind detection invalid. */ + unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length (); + vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), + LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); + if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size) + return false; } - else - return false; /* Scan all the remaining operations in the loop that are not subject to SLP and make sure they are vectorizable. */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index e44396a..47d8a42 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -485,9 +485,8 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, int icode; machine_mode optab_op2_mode; machine_mode vec_mode; - struct data_reference *first_dr; HOST_WIDE_INT dummy; - gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL; + gimple first_load = NULL, prev_first_load = NULL; tree cond; /* For every stmt in NODE find its def stmt/s. */ @@ -785,7 +784,6 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, return false; } - old_first_load = first_load; first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)); if (prev_first_load) { @@ -809,30 +807,6 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, } else prev_first_load = first_load; - - /* In some cases a group of loads is just the same load - repeated N times. Only analyze its cost once. */ - if (first_load == stmt && old_first_load != first_load) - { - first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); - if (vect_supportable_dr_alignment (first_dr, false) - == dr_unaligned_unsupported) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, - vect_location, - "Build SLP failed: unsupported " - "unaligned load "); - dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, - stmt, 0); - dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); - } - /* Fatal mismatch. */ - matches[0] = false; - return false; - } - } } } /* Grouped access. */ else @@ -3201,6 +3175,11 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, bool needs_first_vector = false; machine_mode mode; + if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) + return false; + + stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)); + mode = TYPE_MODE (vectype); if (!can_vec_perm_p (mode, false, NULL)) @@ -3226,8 +3205,10 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE unrolling factor. */ - orig_vec_stmts_num = group_size * - SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits; + orig_vec_stmts_num + = (STMT_VINFO_GROUP_SIZE (stmt_info) + * SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) + + nunits - 1) / nunits; if (orig_vec_stmts_num == 1) only_one_vec = true; @@ -3235,11 +3216,6 @@ vect_transform_slp_perm_load (slp_tree node, vec dr_chain, relatively to SLP_NODE_INSTANCE unrolling factor. */ ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); - if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) - return false; - - stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)); - /* Generate permutation masks for every NODE. Number of masks for each NODE is equal to GROUP_SIZE. E.g., we have a group of three nodes with three loads from the same diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 7ba0d8f..d4d3b91 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6422,7 +6422,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (slp) { grouped_load = false; - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + /* For SLP permutation support we need to load the whole group, + not only the number of vector stmts the permutation result + fits in. */ + if (slp_perm) + vec_num = (group_size * vf + nunits - 1) / nunits; + else + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); group_gap_adj = vf * group_size - nunits * vec_num; } else -- 2.7.4