From 9945596cefaa45d13ecab76b4d97ad021bc3a872 Mon Sep 17 00:00:00 2001 From: Bill Schmidt Date: Fri, 5 May 2017 21:27:54 +0000 Subject: [PATCH] rs6000.c (rs6000_vect_nonmem): New static var. [gcc] 2017-05-05 Bill Schmidt * config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var. (rs6000_init_cost): Initialize rs6000_vect_nonmem. (rs6000_add_stmt_cost): Update rs6000_vect_nonmem. (rs6000_finish_cost): Avoid vectorizing simple copy loops with VF=2 that require versioning. [gcc/testsuite] 2017-05-05 Bill Schmidt * gcc.target/powerpc/versioned-copy-loop.c: New file. From-SVN: r247671 --- gcc/ChangeLog | 8 ++++++ gcc/config/rs6000/rs6000.c | 27 +++++++++++++++++++ gcc/testsuite/ChangeLog | 4 +++ .../gcc.target/powerpc/versioned-copy-loop.c | 30 ++++++++++++++++++++++ 4 files changed, 69 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index aeaa27d..a2f57ac 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2017-05-05 Bill Schmidt + + * config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var. + (rs6000_init_cost): Initialize rs6000_vect_nonmem. + (rs6000_add_stmt_cost): Update rs6000_vect_nonmem. + (rs6000_finish_cost): Avoid vectorizing simple copy loops with + VF=2 that require versioning. + 2017-05-05 David Malcolm * diagnostic.h (CARET_LINE_MARGIN): Convert from macro to const diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 8f68d84..bac56ab 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5873,6 +5873,10 @@ rs6000_density_test (rs6000_cost_data *data) /* Implement targetm.vectorize.init_cost. */ +/* For each vectorized loop, this var holds TRUE iff a non-memory vector + instruction is needed by the vectorization. */ +static bool rs6000_vect_nonmem; + static void * rs6000_init_cost (struct loop *loop_info) { @@ -5881,6 +5885,7 @@ rs6000_init_cost (struct loop *loop_info) data->cost[vect_prologue] = 0; data->cost[vect_body] = 0; data->cost[vect_epilogue] = 0; + rs6000_vect_nonmem = false; return data; } @@ -5907,6 +5912,15 @@ rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, retval = (unsigned) (count * stmt_cost); cost_data->cost[where] += retval; + + /* Check whether we're doing something other than just a copy loop. + Not all such loops may be profitably vectorized; see + rs6000_finish_cost. */ + if ((kind == vec_to_scalar || kind == vec_perm + || kind == vec_promote_demote || kind == vec_construct + || kind == scalar_to_vec) + || (where == vect_body && kind == vector_stmt)) + rs6000_vect_nonmem = true; } return retval; @@ -5923,6 +5937,19 @@ rs6000_finish_cost (void *data, unsigned *prologue_cost, if (cost_data->loop_info) rs6000_density_test (cost_data); + /* Don't vectorize minimum-vectorization-factor, simple copy loops + that require versioning for any reason. The vectorization is at + best a wash inside the loop, and the versioning checks make + profitability highly unlikely and potentially quite harmful. */ + if (cost_data->loop_info) + { + loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info); + if (!rs6000_vect_nonmem + && LOOP_VINFO_VECT_FACTOR (vec_info) == 2 + && LOOP_REQUIRES_VERSIONING (vec_info)) + cost_data->cost[vect_body] += 10000; + } + *prologue_cost = cost_data->cost[vect_prologue]; *body_cost = cost_data->cost[vect_body]; *epilogue_cost = cost_data->cost[vect_epilogue]; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5cd1286..bec90e3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-05-05 Bill Schmidt + + * gcc.target/powerpc/versioned-copy-loop.c: New file. + 2017-05-05 Michael Meissner PR target/79038 diff --git a/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c b/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c new file mode 100644 index 0000000..bbfd165 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ + +/* Verify that a pure copy loop with a vectorization factor of two + that requires alignment will not be vectorized. See the cost + model hooks in rs6000.c. */ + +typedef long unsigned int size_t; +typedef unsigned char uint8_t; + +extern void *memcpy (void *__restrict __dest, const void *__restrict __src, + size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ ((__nonnull__ (1, 2))); + +void foo (void *dstPtr, const void *srcPtr, void *dstEnd) +{ + uint8_t *d = (uint8_t*)dstPtr; + const uint8_t *s = (const uint8_t*)srcPtr; + uint8_t* const e = (uint8_t*)dstEnd; + + do + { + memcpy (d, s, 8); + d += 8; + s += 8; + } + while (d < e); +} + +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ -- 2.7.4