From 09df85393ce5501a28359d159670fd05c4d70abf Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 28 May 2020 10:27:48 +0200 Subject: [PATCH] tree-optimization/95273 - more vectorizable_shift massaging Covering all bases in vectorizable_shift is hard - this makes sure to appropriately handle the case of PR95356 without breaking others. 2020-05-28 Richard Biener PR tree-optimization/95273 PR tree-optimization/95356 * tree-vect-stmts.c (vectorizable_shift): Adjust when and to what we set the vector type of the shift operand SLP node again. * gcc.target/i386/pr95356.c: New testcase. --- gcc/testsuite/gcc.target/i386/pr95356.c | 125 ++++++++++++++++++++++++++++++++ gcc/tree-vect-stmts.c | 6 +- 2 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr95356.c diff --git a/gcc/testsuite/gcc.target/i386/pr95356.c b/gcc/testsuite/gcc.target/i386/pr95356.c new file mode 100644 index 0000000..fdd917b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr95356.c @@ -0,0 +1,125 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mavx512dq" } */ + +extern void abort (void); +long long a[16]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + long long a0, a1, a2, a3; + a0 = a[0]; + a1 = a[1]; + a2 = a[2]; + a3 = a[3]; + a0 = a0 << 2; + a1 = a1 << 3; + a2 = a2 << 4; + a3 = a3 << 5; + a[0] = a0; + a[1] = a1; + a[2] = a2; + a[3] = a3; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + long long a0, a1, a2, a3; + a0 = a[0]; + a1 = a[1]; + a2 = a[2]; + a3 = a[3]; + a0 = a0 << 2; + a1 = a1 << 2; + a2 = a2 << 2; + a3 = a3 << 2; + a[0] = a0; + a[1] = a1; + a[2] = a2; + a[3] = a3; +} + +__attribute__((noinline, noclone)) void +f2a (int x) +{ + long long a0, a1, a2, a3; + a0 = a[0]; + a1 = a[1]; + a2 = a[2]; + a3 = a[3]; + a0 = a0 << x; + a1 = a1 << 2; + a2 = a2 << 2; + a3 = a3 << 2; + a[0] = a0; + a[1] = a1; + a[2] = a2; + a[3] = a3; +} + +__attribute__((noinline, noclone)) void +f2b (int x) +{ + long long a0, a1, a2, a3; + a0 = a[0]; + a1 = a[1]; + a2 = a[2]; + a3 = a[3]; + a0 = a0 << 2; + a1 = a1 << 2; + a2 = a2 << x; + a3 = a3 << 2; + a[0] = a0; + a[1] = a1; + a[2] = a2; + a[3] = a3; +} + +__attribute__((noinline, noclone)) void +f3 (int x) +{ + long long a0, a1, a2, a3; + a0 = a[0]; + a1 = a[1]; + a2 = a[2]; + a3 = a[3]; + a0 = a0 << x; + a1 = a1 << x; + a2 = a2 << x; + a3 = a3 << x; + a[0] = a0; + a[1] = a1; + a[2] = a2; + a[3] = a3; +} + +int +main () +{ + a[0] = 4LL; + a[1] = 3LL; + a[2] = 2LL; + a[3] = 1LL; + f1 (); + if (a[0] != (4LL << 2) || a[1] != (3LL << 3) + || a[2] != (2LL << 4) || a[3] != (1LL << 5)) + abort (); + f2 (); + if (a[0] != (4LL << 4) || a[1] != (3LL << 5) + || a[2] != (2LL << 6) || a[3] != (1LL << 7)) + abort (); + f3 (3); + if (a[0] != (4LL << 7) || a[1] != (3LL << 8) + || a[2] != (2LL << 9) || a[3] != (1LL << 10)) + abort (); + f2a (3); + if (a[0] != (4LL << 10) || a[1] != (3LL << 10) + || a[2] != (2LL << 11) || a[3] != (1LL << 12)) + abort (); + f2b (3); + if (a[0] != (4LL << 12) || a[1] != (3LL << 12) + || a[2] != (2LL << 14) || a[3] != (1LL << 14)) + abort (); + return 0; +} diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 2f92bb5..ff335aa 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -5792,7 +5792,11 @@ vectorizable_shift (vec_info *vinfo, if (slp_node && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype) || (!scalar_shift_arg - && !vect_maybe_update_slp_op_vectype (slp_op1, vectype)))) + && (!incompatible_op1_vectype_p + || dt[1] == vect_constant_def) + && !vect_maybe_update_slp_op_vectype + (slp_op1, + incompatible_op1_vectype_p ? vectype : op1_vectype)))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -- 2.7.4