From 0447929f11e6a3e1b076841712b90a8b6bc7d33a Mon Sep 17 00:00:00 2001 From: Xionghu Luo Date: Sun, 10 May 2020 21:06:20 -0500 Subject: [PATCH] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403) Use determine_value_range to get value range info for fold convert expressions with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on wrapping overflow inner type. i.e.: (long unsigned int)((unsigned int)n * 10 + 1) => (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1 With this patch for affine combination, load/store motion could detect more address refs independency and promote some memory expressions to registers within loop. PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))" to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow. Bootstrap and regression tested pass on Power8-LE. gcc/ChangeLog 2020-05-11 Xiong Hu Luo PR tree-optimization/83403 * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with determine_value_range, Add fold conversion of MULT_EXPR, fix the previous PLUS_EXPR. gcc/testsuite/ChangeLog 2020-05-11 Xiong Hu Luo PR tree-optimization/83403 * gcc.dg/tree-ssa/pr83403-1.c: New test. * gcc.dg/tree-ssa/pr83403-2.c: New test. * gcc.dg/tree-ssa/pr83403.h: New header. --- gcc/ChangeLog | 7 +++++++ gcc/testsuite/ChangeLog | 7 +++++++ gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 8 ++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 8 ++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr83403.h | 30 ++++++++++++++++++++++++++++++ gcc/tree-affine.c | 24 ++++++++++++++---------- 6 files changed, 74 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b0f0413..d23ebff 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2020-05-11 Xiong Hu Luo + + PR tree-optimization/83403 + * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with + determine_value_range, Add fold conversion of MULT_EXPR, fix the + previous PLUS_EXPR. + 2020-05-10 Gerald Pfeifer * config/i386/i386-c.c (ix86_target_macros): Define _ILP32 and diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 791c62a..aa11ca3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2020-05-11 Xiong Hu Luo + + PR tree-optimization/83403 + * gcc.dg/tree-ssa/pr83403-1.c: New test. + * gcc.dg/tree-ssa/pr83403-2.c: New test. + * gcc.dg/tree-ssa/pr83403.h: New header. + 2020-05-10 Harald Anlauf PR fortran/93499 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c new file mode 100644 index 0000000..748375b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ + +#define TYPE unsigned int + +#include "pr83403.h" + +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c new file mode 100644 index 0000000..ca2e6bb --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ + +#define TYPE int + +#include "pr83403.h" + +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h new file mode 100644 index 0000000..0da8a83 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h @@ -0,0 +1,30 @@ +__attribute__ ((noinline)) void +calculate (const double *__restrict__ A, const double *__restrict__ B, + double *__restrict__ C) +{ + TYPE m = 0; + TYPE n = 0; + TYPE k = 0; + + A = (const double *) __builtin_assume_aligned (A, 16); + B = (const double *) __builtin_assume_aligned (B, 16); + C = (double *) __builtin_assume_aligned (C, 16); + + for (n = 0; n < 9; n++) + { + for (m = 0; m < 10; m++) + { + C[(n * 10) + m] = 0.0; + } + + for (k = 0; k < 17; k++) + { +#pragma simd + for (m = 0; m < 10; m++) + { + C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k]; + } + } + } +} + diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c index 0eb8db1..5620e6b 100644 --- a/gcc/tree-affine.c +++ b/gcc/tree-affine.c @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, wide_int minv, maxv; /* If inner type has wrapping overflow behavior, fold conversion for below case: - (T1)(X - CST) -> (T1)X - (T1)CST - if X - CST doesn't overflow by range information. Also handle - (T1)(X + CST) as (T1)(X - (-CST)). */ + (T1)(X *+- CST) -> (T1)X *+- (T1)CST + if X *+- CST doesn't overflow by range information. */ if (TYPE_UNSIGNED (itype) && TYPE_OVERFLOW_WRAPS (itype) - && TREE_CODE (op0) == SSA_NAME && TREE_CODE (op1) == INTEGER_CST - && icode != MULT_EXPR - && get_range_info (op0, &minv, &maxv) == VR_RANGE) + && determine_value_range (op0, &minv, &maxv) == VR_RANGE) { + wi::overflow_type overflow = wi::OVF_NONE; + signop sign = UNSIGNED; if (icode == PLUS_EXPR) - op1 = wide_int_to_tree (itype, -wi::to_wide (op1)); - if (wi::geu_p (minv, wi::to_wide (op1))) + wi::add (maxv, wi::to_wide (op1), sign, &overflow); + else if (icode == MULT_EXPR) + wi::mul (maxv, wi::to_wide (op1), sign, &overflow); + else + wi::sub (minv, wi::to_wide (op1), sign, &overflow); + + if (overflow == wi::OVF_NONE) { op0 = fold_convert (otype, op0); op1 = fold_convert (otype, op1); - return expr_to_aff_combination (comb, MINUS_EXPR, otype, - op0, op1); + return expr_to_aff_combination (comb, icode, otype, op0, + op1); } } } -- 2.7.4