From a93cdc5c6f1d56226c3ef7b69423a4074783de34 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 7 Oct 2016 11:33:47 +0200 Subject: [PATCH] re PR tree-optimization/77664 (Missed optimization: signed int >= 0 && < unsigned short) PR tree-optimization/77664 * tree-ssa-reassoc.c (update_range_test): Also clear low and high for the other ranges. (optimize_range_tests_diff): Fix up formatting. (optimize_range_tests_var_bound): New function. (optimize_range_tests): Use it. * gcc.dg/tree-ssa/pr77664.c: New test. * gcc.dg/pr77664.c: New test. From-SVN: r240858 --- gcc/ChangeLog | 9 ++ gcc/testsuite/ChangeLog | 8 +- gcc/testsuite/gcc.dg/pr77664.c | 55 +++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr77664.c | 49 ++++++++ gcc/tree-ssa-reassoc.c | 203 +++++++++++++++++++++++++++++++- 5 files changed, 319 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr77664.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr77664.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 14b298c..1be7817 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2016-10-07 Jakub Jelinek + + PR tree-optimization/77664 + * tree-ssa-reassoc.c (update_range_test): Also clear low and high + for the other ranges. + (optimize_range_tests_diff): Fix up formatting. + (optimize_range_tests_var_bound): New function. + (optimize_range_tests): Use it. + 2016-10-07 Martin Liska * coverage.c (build_gcov_exit_decl): Fix priority what diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0728e02..5721cda 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,4 +1,10 @@ -2016-10-06 Louis Krupp +2016-10-07 Jakub Jelinek + + PR tree-optimization/77664 + * gcc.dg/tree-ssa/pr77664.c: New test. + * gcc.dg/pr77664.c: New test. + +2016-10-06 Louis Krupp * gfortran.dg/pr69955.f90: New test. diff --git a/gcc/testsuite/gcc.dg/pr77664.c b/gcc/testsuite/gcc.dg/pr77664.c new file mode 100644 index 0000000..bcf7b88 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr77664.c @@ -0,0 +1,55 @@ +/* PR tree-optimization/77664 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include "tree-ssa/pr77664.c" + +int cnt; + +__attribute__((noinline, noclone)) void +foo (void) +{ + cnt++; +} + +int +main () +{ + fn1 (65534U, 65535U, 7); if (cnt != 1) __builtin_abort (); + fn1 (65534U, 65535U, 0); if (cnt != 1) __builtin_abort (); + fn1 (65535U, 65535U, 1); if (cnt != 1) __builtin_abort (); + fn1 (0, 65535U, 1); if (cnt != 2) __builtin_abort (); + fn1 (-1, 65535U, 1); if (cnt != 2) __builtin_abort (); + fn1 (0, 0, 1); if (cnt != 2) __builtin_abort (); + fn2 (65534U, 65535U, 7); if (cnt != 3) __builtin_abort (); + fn2 (65534U, 65535U, 0); if (cnt != 3) __builtin_abort (); + fn2 (65535U, 65535U, 0); if (cnt != 4) __builtin_abort (); + fn2 (0, 65535U, 0); if (cnt != 4) __builtin_abort (); + fn2 (-1, 65535U, 0); if (cnt != 5) __builtin_abort (); + fn2 (0, 0, 0); if (cnt != 6) __builtin_abort (); + fn3 (-1, 65534U); if (cnt != 7) __builtin_abort (); + fn3 (0, 65534U); if (cnt != 7) __builtin_abort (); + fn3 (65534U, 65534U); if (cnt != 7) __builtin_abort (); + fn3 (65535U, 65534U); if (cnt != 8) __builtin_abort (); + fn3 (0, 0); if (cnt != 8) __builtin_abort (); + fn3 (1, 0); if (cnt != 9) __builtin_abort (); + fn4 (-1, 65534U); if (cnt != 9) __builtin_abort (); + fn4 (0, 65534U); if (cnt != 10) __builtin_abort (); + fn4 (65534U, 65534U); if (cnt != 11) __builtin_abort (); + fn4 (65535U, 65534U); if (cnt != 11) __builtin_abort (); + fn4 (0, 0); if (cnt != 12) __builtin_abort (); + fn4 (1, 0); if (cnt != 12) __builtin_abort (); + fn5 (-1, 65534U); if (cnt != 13) __builtin_abort (); + fn5 (0, 65534U); if (cnt != 13) __builtin_abort (); + fn5 (65534U, 65534U); if (cnt != 13) __builtin_abort (); + fn5 (65535U, 65534U); if (cnt != 14) __builtin_abort (); + fn5 (0, 0); if (cnt != 14) __builtin_abort (); + fn5 (1, 0); if (cnt != 15) __builtin_abort (); + fn6 (-1, 65534U); if (cnt != 15) __builtin_abort (); + fn6 (0, 65534U); if (cnt != 16) __builtin_abort (); + fn6 (65534U, 65534U); if (cnt != 17) __builtin_abort (); + fn6 (65535U, 65534U); if (cnt != 17) __builtin_abort (); + fn6 (0, 0); if (cnt != 18) __builtin_abort (); + fn6 (1, 0); if (cnt != 18) __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr77664.c b/gcc/testsuite/gcc.dg/tree-ssa/pr77664.c new file mode 100644 index 0000000..e98945f --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr77664.c @@ -0,0 +1,49 @@ +/* PR tree-optimization/77664 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-reassoc1-details" } */ + +extern void foo (void); + +/* { dg-final { scan-tree-dump-times "Optimizing range test \[^\n\r]* and comparison" 6 "reassoc1" } } */ + +__attribute__((noinline, noclone)) void +fn1 (long long int a, unsigned short b, int c) +{ + if (a >= 0 && c && a < b) + foo (); +} + +__attribute__((noinline, noclone)) void +fn2 (long long int a, unsigned short b, int c) +{ + if (a < 0 || c || a >= b) + foo (); +} + +__attribute__((noinline, noclone)) void +fn3 (long long int a, unsigned short b) +{ + if (a < 0 || b < a) + foo (); +} + +__attribute__((noinline, noclone)) void +fn4 (long long int a, unsigned short b) +{ + if (a <= b && a >= 0) + foo (); +} + +__attribute__((noinline, noclone)) void +fn5 (long long int a, unsigned short b) +{ + if (a < 0 | a > b) + foo (); +} + +__attribute__((noinline, noclone)) void +fn6 (long long int a, unsigned short b) +{ + if (b >= a & a >= 0) + foo (); +} diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c index d94ff70..c5b36ef 100644 --- a/gcc/tree-ssa-reassoc.c +++ b/gcc/tree-ssa-reassoc.c @@ -2428,6 +2428,8 @@ update_range_test (struct range_entry *range, struct range_entry *otherrange, else oe->op = error_mark_node; range->exp = NULL_TREE; + range->low = NULL_TREE; + range->high = NULL_TREE; } return true; } @@ -2485,10 +2487,10 @@ optimize_range_tests_xor (enum tree_code opcode, tree type, ((X - 43U) & ~(75U - 43U)) <= 3U. */ static bool optimize_range_tests_diff (enum tree_code opcode, tree type, - tree lowi, tree lowj, tree highi, tree highj, - vec *ops, - struct range_entry *rangei, - struct range_entry *rangej) + tree lowi, tree lowj, tree highi, tree highj, + vec *ops, + struct range_entry *rangei, + struct range_entry *rangej) { tree tem1, tem2, mask; /* Check highi - lowi == highj - lowj. */ @@ -2829,6 +2831,197 @@ optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length, return any_changes; } +/* Attempt to optimize for signed a and b where b is known to be >= 0: + a >= 0 && a < b into (unsigned) a < (unsigned) b + a >= 0 && a <= b into (unsigned) a <= (unsigned) b */ + +static bool +optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, + vec *ops, + struct range_entry *ranges) +{ + int i; + bool any_changes = false; + hash_map *map = NULL; + + for (i = first; i < length; i++) + { + if (ranges[i].exp == NULL_TREE || !ranges[i].in_p) + continue; + + tree type = TREE_TYPE (ranges[i].exp); + if (!INTEGRAL_TYPE_P (type) + || TYPE_UNSIGNED (type) + || ranges[i].low == NULL_TREE + || !integer_zerop (ranges[i].low) + || ranges[i].high != NULL_TREE) + continue; + /* EXP >= 0 here. */ + if (map == NULL) + map = new hash_map ; + map->put (ranges[i].exp, i); + } + + if (map == NULL) + return false; + + for (i = 0; i < length; i++) + { + if (ranges[i].low == NULL_TREE + || ranges[i].high == NULL_TREE + || !integer_zerop (ranges[i].low) + || !integer_zerop (ranges[i].high)) + continue; + + gimple *stmt; + tree_code ccode; + tree rhs1, rhs2; + if (ranges[i].exp) + { + stmt = SSA_NAME_DEF_STMT (ranges[i].exp); + if (!is_gimple_assign (stmt)) + continue; + ccode = gimple_assign_rhs_code (stmt); + rhs1 = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + } + else + { + operand_entry *oe = (*ops)[ranges[i].idx]; + stmt = last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)); + if (gimple_code (stmt) != GIMPLE_COND) + continue; + ccode = gimple_cond_code (stmt); + rhs1 = gimple_cond_lhs (stmt); + rhs2 = gimple_cond_rhs (stmt); + } + + if (TREE_CODE (rhs1) != SSA_NAME + || rhs2 == NULL_TREE + || TREE_CODE (rhs2) != SSA_NAME) + continue; + + switch (ccode) + { + case GT_EXPR: + case GE_EXPR: + if (!ranges[i].in_p) + std::swap (rhs1, rhs2); + ccode = swap_tree_comparison (ccode); + break; + case LT_EXPR: + case LE_EXPR: + if (ranges[i].in_p) + std::swap (rhs1, rhs2); + break; + default: + continue; + } + + int *idx = map->get (rhs1); + if (idx == NULL) + continue; + + wide_int nz = get_nonzero_bits (rhs2); + if (wi::neg_p (nz)) + continue; + + /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0 + and RHS2 is known to be RHS2 >= 0. */ + tree utype = unsigned_type_for (TREE_TYPE (rhs1)); + + enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; + if ((ranges[*idx].strict_overflow_p + || ranges[i].strict_overflow_p) + && issue_strict_overflow_warning (wc)) + warning_at (gimple_location (stmt), OPT_Wstrict_overflow, + "assuming signed overflow does not occur " + "when simplifying range test"); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + struct range_entry *r = &ranges[*idx]; + fprintf (dump_file, "Optimizing range test "); + print_generic_expr (dump_file, r->exp, 0); + fprintf (dump_file, " +["); + print_generic_expr (dump_file, r->low, 0); + fprintf (dump_file, ", "); + print_generic_expr (dump_file, r->high, 0); + fprintf (dump_file, "] and comparison "); + print_generic_expr (dump_file, rhs1, 0); + fprintf (dump_file, " %s ", op_symbol_code (ccode)); + print_generic_expr (dump_file, rhs2, 0); + fprintf (dump_file, "\n into ("); + print_generic_expr (dump_file, utype, 0); + fprintf (dump_file, ") "); + print_generic_expr (dump_file, rhs1, 0); + fprintf (dump_file, " %s (", op_symbol_code (ccode)); + print_generic_expr (dump_file, utype, 0); + fprintf (dump_file, ") "); + print_generic_expr (dump_file, rhs2, 0); + fprintf (dump_file, "\n"); + } + + if (ranges[i].in_p) + std::swap (rhs1, rhs2); + + unsigned int uid = gimple_uid (stmt); + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1); + gimple_set_uid (g, uid); + rhs1 = gimple_assign_lhs (g); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2); + gimple_set_uid (g, uid); + rhs2 = gimple_assign_lhs (g); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + if (tree_swap_operands_p (rhs1, rhs2, false)) + { + std::swap (rhs1, rhs2); + ccode = swap_tree_comparison (ccode); + } + if (gimple_code (stmt) == GIMPLE_COND) + { + gcond *c = as_a (stmt); + gimple_cond_set_code (c, ccode); + gimple_cond_set_lhs (c, rhs1); + gimple_cond_set_rhs (c, rhs2); + update_stmt (stmt); + } + else + { + g = gimple_build_assign (make_ssa_name (TREE_TYPE (ranges[i].exp)), + ccode, rhs1, rhs2); + gimple_set_uid (g, uid); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + ranges[i].exp = gimple_assign_lhs (g); + (*ops)[ranges[i].idx]->op = ranges[i].exp; + } + ranges[i].strict_overflow_p = false; + operand_entry *oe = (*ops)[ranges[*idx].idx]; + /* Now change all the other range test immediate uses, so that + those tests will be optimized away. */ + if (opcode == ERROR_MARK) + { + if (oe->op) + oe->op = build_int_cst (TREE_TYPE (oe->op), + oe->rank == BIT_IOR_EXPR ? 0 : 1); + else + oe->op = (oe->rank == BIT_IOR_EXPR + ? boolean_false_node : boolean_true_node); + } + else + oe->op = error_mark_node; + ranges[*idx].exp = NULL_TREE; + ranges[*idx].low = NULL_TREE; + ranges[*idx].high = NULL_TREE; + any_changes = true; + } + + delete map; + return any_changes; +} + /* Optimize range tests, similarly how fold_range_test optimizes it on trees. The tree code for the binary operation between all the operands is OPCODE. @@ -2917,6 +3110,8 @@ optimize_range_tests (enum tree_code opcode, if (lshift_cheap_p (optimize_function_for_speed_p (cfun))) any_changes |= optimize_range_tests_to_bit_test (opcode, first, length, ops, ranges); + any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, + ranges); if (any_changes && opcode != ERROR_MARK) { -- 2.7.4