From 28002f1ad17bb0c19c5371ecf2de364303eff0d3 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 9 Feb 2015 11:51:05 +0000 Subject: [PATCH] re PR tree-optimization/54000 (Performance breakdown for gcc-4.{6,7} vs. gcc-4.5 using std::vector in matrix vector multiplication (IVopts / inliner)) 2015-02-09 Richard Biener PR tree-optimization/54000 * tree-ssa-looo-ivopts.c: Include tree-vectorizer.h. (struct ivopts_data): Add loop_loc member. (tree_ssa_iv_optimize_loop): Dump loop location. (create_new_ivs): Likewise, also dump number of IVs generated. * g++.dg/tree-ssa/ivopts-3.C: New testcase. From-SVN: r220536 --- gcc/ChangeLog | 8 ++++ gcc/testsuite/ChangeLog | 5 +++ gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C | 76 ++++++++++++++++++++++++++++++++ gcc/tree-ssa-loop-ivopts.c | 16 ++++++- 4 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 202a815..b0be734 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2015-02-09 Richard Biener + + PR tree-optimization/54000 + * tree-ssa-looo-ivopts.c: Include tree-vectorizer.h. + (struct ivopts_data): Add loop_loc member. + (tree_ssa_iv_optimize_loop): Dump loop location. + (create_new_ivs): Likewise, also dump number of IVs generated. + 2015-02-09 Martin Liska * ipa-icf.c (sem_item_optimizer::register_hooks): Register hooks diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 67cb4c95..7a8734b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-02-09 Richard Biener + + PR tree-optimization/54000 + * g++.dg/tree-ssa/ivopts-3.C: New testcase. + 2015-02-09 Tom de Vries * gcc.dg/graphite/scop-19.c: Fix scan-tree-dump for fpic. diff --git a/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C b/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C new file mode 100644 index 0000000..d0178b3 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/ivopts-3.C @@ -0,0 +1,76 @@ +// { dg-do compile } +// { dg-options "-O2 -fdump-tree-ivopts-details" } + +class MinimalVec3 +{ +protected: + double coords[3]; + +public: + + MinimalVec3( ) { + for ( int i = 0; i < 3; ++i ) + coords[i] = 0.; + } + + inline const double& operator[] ( int I ) const { + return coords[I]; + } +}; + +class MinimalVector +{ +protected: + double *_pData; + double stuff; + +public: + explicit MinimalVector ( int length ) { + _pData = new double[length]; + for (int i = 0; i < length; ++i) _pData[i] = 0.; + } + + inline double& operator[] ( int I ) { + return _pData[I]; + } + + inline const double& operator[] ( int I ) const { + return _pData[I]; + } +}; + + +int main ( int , char** ) { + int w = ( 1 << 7 )+1; + int wsqr = w*w; + int wcub = w*w*w; + + MinimalVec3 * rows[9]; + for ( int i = 0; i < 9; ++i ) { + rows[i] = new MinimalVec3[wcub]; + } + + MinimalVector img ( wcub ), res ( wcub ); + + for ( int c = 0; c < 1000; ++c ) { + + for ( int i = 1; i < w-1; ++i ) + for ( int j = 0; j < 3; ++j ) { + + for ( int k = 1; k < w - 1; ++k ) + for ( int l = 0; l < 3; ++l ) { + + for ( int m = 1; m < w - 1; ++m ) + for ( int n = 0; n < 3; ++n ) + res[i*wsqr + k*w + m] += img[( i + j - 1 ) *wsqr + ( k + l - 1 ) *w + m + n - 1] * rows[j*3 + l][i*wsqr + k*w + m][n]; + + } + } + } + return 0; +} + +// Verify that on x86_64 and i?86 we use a single IV for the innermost loop + +// { dg-final { scan-tree-dump "Selected IV set for loop \[0-9\]* at \[^ \]*:64, 1 IVs" "ivopts" { target x86_64-*-* i?86-*-* } } } +// { dg-final { cleanup-tree-dump "ivopts" } } diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index a701636..438ff96 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -138,6 +138,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-propagate.h" #include "tree-ssa-address.h" #include "builtins.h" +#include "tree-vectorizer.h" /* FIXME: Expressions are expanded to RTL in this pass to determine the cost of different addressing modes. This should be moved to a TBD @@ -318,6 +319,7 @@ struct ivopts_data { /* The currently optimized loop. */ struct loop *current_loop; + source_location loop_loc; /* Numbers of iterations for all exits of the current loop. */ hash_map *niters; @@ -6340,7 +6342,12 @@ create_new_ivs (struct ivopts_data *data, struct iv_ca *set) if (dump_file && (dump_flags & TDF_DETAILS)) { - fprintf (dump_file, "\nSelected IV set: \n"); + fprintf (dump_file, "Selected IV set for loop %d", + data->current_loop->num); + if (data->loop_loc != UNKNOWN_LOCATION) + fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), + LOCATION_LINE (data->loop_loc)); + fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands)); EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) { cand = iv_cand (data, i); @@ -6943,11 +6950,16 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) gcc_assert (!data->niters); data->current_loop = loop; + data->loop_loc = find_loop_location (loop); data->speed = optimize_loop_for_speed_p (loop); if (dump_file && (dump_flags & TDF_DETAILS)) { - fprintf (dump_file, "Processing loop %d\n", loop->num); + fprintf (dump_file, "Processing loop %d", loop->num); + if (data->loop_loc != UNKNOWN_LOCATION) + fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), + LOCATION_LINE (data->loop_loc)); + fprintf (dump_file, "\n"); if (exit) { -- 2.7.4