From 58f2fb5ca16a39f3af10dee718d87335b3cd5222 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Wed, 1 Jun 2016 20:09:35 +0000 Subject: [PATCH] re PR target/71186 (PowerPC64: Autovectorised code hits ICE with -O3 -mpower9 -mlra) [gcc] 2016-05-31 Michael Meissner PR target/71186 * config/rs6000/vsx.md (xxspltib__nosplit): Add alternatives for loading up all 0's or all 1's. [gcc/testsuite] 2016-05-31 Michael Meissner PR target/71186 * gcc.target/powerpc/pr71186.c: New test. Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (.../svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 236935) +++ gcc/config/rs6000/vsx.md (.../gcc/config/rs6000) (working copy) @@ -776,8 +776,8 @@ (define_insn "xxspltib_v16qi" [(set_attr "type" "vecperm")]) (define_insn "xxspltib__nosplit" - [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa") - (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "wE"))] + [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa") + (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))] "TARGET_P9_VECTOR" { rtx op1 = operands[1]; [gcc] 2016-05-31 Michael Meissner * config/rs6000/vsx.md (vsx_splat_, V2DI/V2DF): Simplify alternatives, eliminating preferred register class. Add support for the MTVSRDD instruction in ISA 3.0. (vsx_splat_v4si_internal): Use splat_input_operand instead of reg_or_indexed_operand. (vsx_splat_v4sf_internal): Likewise. [gcc/testsuite] 2016-05-31 Michael Meissner * gcc.target/powerpc/p9-splat-4.c: New test. From-SVN: r237006 --- gcc/ChangeLog | 15 +++++++++++++ gcc/config/rs6000/vsx.md | 19 +++++++--------- gcc/testsuite/ChangeLog | 9 ++++++++ gcc/testsuite/gcc.target/powerpc/p9-splat-4.c | 10 +++++++++ gcc/testsuite/gcc.target/powerpc/pr71186.c | 32 +++++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/p9-splat-4.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr71186.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 45ae2f9..8fb0bab 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2016-05-31 Michael Meissner + + * config/rs6000/vsx.md (vsx_splat_, V2DI/V2DF): Simplify + alternatives, eliminating preferred register class. Add support + for the MTVSRDD instruction in ISA 3.0. + (vsx_splat_v4si_internal): Use splat_input_operand instead of + reg_or_indexed_operand. + (vsx_splat_v4sf_internal): Likewise. + +2016-05-31 Michael Meissner + + PR target/71186 + * config/rs6000/vsx.md (xxspltib__nosplit): Add alternatives + for loading up all 0's or all 1's. + 2016-06-01 Thomas Preud'homme * doc/sourcebuild.texi (arm_acq_rel): Document new effective target. diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 2b6963b..58e1cb5 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -776,8 +776,8 @@ [(set_attr "type" "vecperm")]) (define_insn "xxspltib__nosplit" - [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa") - (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "wE"))] + [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa") + (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))] "TARGET_P9_VECTOR" { rtx op1 = operands[1]; @@ -2384,18 +2384,15 @@ ;; V2DF/V2DI splat (define_insn "vsx_splat_" - [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,wd,wd,?,?,?") + [(set (match_operand:VSX_D 0 "vsx_register_operand" "=,,we") (vec_duplicate:VSX_D - (match_operand: 1 "splat_input_operand" ",f,Z,,,Z")))] + (match_operand: 1 "splat_input_operand" ",Z,b")))] "VECTOR_MEM_VSX_P (mode)" "@ xxpermdi %x0,%x1,%x1,0 - xxpermdi %x0,%x1,%x1,0 lxvdsx %x0,%y1 - xxpermdi %x0,%x1,%x1,0 - xxpermdi %x0,%x1,%x1,0 - lxvdsx %x0,%y1" - [(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")]) + mtvsrdd %x0,%1,%1" + [(set_attr "type" "vecperm,vecload,mftgpr")]) ;; V4SI splat (ISA 3.0) ;; When SI's are allowed in VSX registers, add XXSPLTW support @@ -2414,7 +2411,7 @@ (define_insn "*vsx_splat_v4si_internal" [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa") (vec_duplicate:V4SI - (match_operand:SI 1 "reg_or_indexed_operand" "r,Z")))] + (match_operand:SI 1 "splat_input_operand" "r,Z")))] "TARGET_P9_VECTOR" "@ mtvsrws %x0,%1 @@ -2425,7 +2422,7 @@ (define_insn_and_split "*vsx_splat_v4sf_internal" [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa") (vec_duplicate:V4SF - (match_operand:SF 1 "reg_or_indexed_operand" "Z,wy,r")))] + (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))] "TARGET_P9_VECTOR" "@ lxvwsx %x0,%y1 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7ee77b6..6d5ee16 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2016-05-31 Michael Meissner + + * gcc.target/powerpc/p9-splat-4.c: New test. + +2016-05-31 Michael Meissner + + PR target/71186 + * gcc.target/powerpc/pr71186.c: New test. + 2016-06-01 Jerry DeLisle PR fortran/52393 diff --git a/gcc/testsuite/gcc.target/powerpc/p9-splat-4.c b/gcc/testsuite/gcc.target/powerpc/p9-splat-4.c new file mode 100644 index 0000000..d643324 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p9-splat-4.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ + +#include + +vector long long foo (long long a) { return (vector long long) { a, a }; } + +/* { dg-final { scan-assembler "mtvsrdd" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr71186.c b/gcc/testsuite/gcc.target/powerpc/pr71186.c new file mode 100644 index 0000000..22762cc --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr71186.c @@ -0,0 +1,32 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ + +static unsigned short x[(16384/sizeof(unsigned short))] __attribute__ ((aligned (16))); +static unsigned short y[(16384/sizeof(unsigned short))] __attribute__ ((aligned (16))); +static unsigned short a; + +void obfuscate(void *a, ...); + +static void __attribute__((noinline)) do_one(void) +{ + unsigned long i; + + obfuscate(x, y, &a); + + for (i = 0; i < (16384/sizeof(unsigned short)); i++) + y[i] = a * x[i]; + + obfuscate(x, y, &a); +} + +int main(void) +{ + unsigned long i; + + for (i = 0; i < 1000000; i++) + do_one(); + + return 0; +} -- 2.7.4