From 287522613d661b4c5ba8403b051eb470c1674cba Mon Sep 17 00:00:00 2001 From: Marc Glisse Date: Mon, 10 Aug 2020 12:50:42 +0200 Subject: [PATCH] Simplify X * C1 == C2 with wrapping overflow Odd numbers are invertible in Z / 2^n Z, so X * C1 == C2 can be rewritten as X == C2 * inv(C1) when overflow wraps. mod_inv should probably be updated to better match the other wide_int functions, but that's a separate issue. 2020-08-10 Marc Glisse PR tree-optimization/95433 * match.pd (X * C1 == C2): Handle wrapping overflow. * expr.c (maybe_optimize_mod_cmp): Qualify call to mod_inv. (mod_inv): Move... * wide-int.cc (mod_inv): ... here. * wide-int.h (mod_inv): Declare it. * gcc.dg/tree-ssa/pr95433-2.c: New file. --- gcc/expr.c | 34 +------------------------------ gcc/match.pd | 19 +++++++++++++++-- gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c | 15 ++++++++++++++ gcc/wide-int.cc | 33 ++++++++++++++++++++++++++++++ gcc/wide-int.h | 2 ++ 5 files changed, 68 insertions(+), 35 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c diff --git a/gcc/expr.c b/gcc/expr.c index a150fa0..ebf0c9e 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -11859,38 +11859,6 @@ string_constant (tree arg, tree *ptr_offset, tree *mem_size, tree *decl) return init; } -/* Compute the modular multiplicative inverse of A modulo M - using extended Euclid's algorithm. Assumes A and M are coprime. */ -static wide_int -mod_inv (const wide_int &a, const wide_int &b) -{ - /* Verify the assumption. */ - gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1)); - - unsigned int p = a.get_precision () + 1; - gcc_checking_assert (b.get_precision () + 1 == p); - wide_int c = wide_int::from (a, p, UNSIGNED); - wide_int d = wide_int::from (b, p, UNSIGNED); - wide_int x0 = wide_int::from (0, p, UNSIGNED); - wide_int x1 = wide_int::from (1, p, UNSIGNED); - - if (wi::eq_p (b, 1)) - return wide_int::from (1, p, UNSIGNED); - - while (wi::gt_p (c, 1, UNSIGNED)) - { - wide_int t = d; - wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d); - c = t; - wide_int s = x0; - x0 = wi::sub (x1, wi::mul (q, x0)); - x1 = s; - } - if (wi::lt_p (x1, 0, SIGNED)) - x1 += d; - return x1; -} - /* Optimize x % C1 == C2 for signed modulo if C1 is a power of two and C2 is non-zero and C3 ((1<<(prec-1)) | (C1 - 1)): for C2 > 0 to x & C3 == C2 @@ -12101,7 +12069,7 @@ maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1) w = wi::lrshift (w, shift); wide_int a = wide_int::from (w, prec + 1, UNSIGNED); wide_int b = wi::shifted_mask (prec, 1, false, prec + 1); - wide_int m = wide_int::from (mod_inv (a, b), prec, UNSIGNED); + wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED); tree c3 = wide_int_to_tree (type, m); tree c5 = NULL_TREE; wide_int d, e; diff --git a/gcc/match.pd b/gcc/match.pd index 7e5c5a6..c3b8816 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3828,7 +3828,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cmp @0 @2)))))) /* For integral types with undefined overflow fold - x * C1 == C2 into x == C2 / C1 or false. */ + x * C1 == C2 into x == C2 / C1 or false. + If overflow wraps and C1 is odd, simplify to x == C2 / C1 in the ring + Z / 2^n Z. */ (for cmp (eq ne) (simplify (cmp (mult @0 INTEGER_CST@1) INTEGER_CST@2) @@ -3839,7 +3841,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (wi::multiple_of_p (wi::to_widest (@2), wi::to_widest (@1), TYPE_SIGN (TREE_TYPE (@0)), ")) (cmp @0 { wide_int_to_tree (TREE_TYPE (@0), quot); }) - { constant_boolean_node (cmp == NE_EXPR, type); }))))) + { constant_boolean_node (cmp == NE_EXPR, type); })) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)) + && (wi::bit_and (wi::to_wide (@1), 1) == 1)) + (cmp @0 + { + tree itype = TREE_TYPE (@0); + int p = TYPE_PRECISION (itype); + wide_int m = wi::one (p + 1) << p; + wide_int a = wide_int::from (wi::to_wide (@1), p + 1, UNSIGNED); + wide_int i = wide_int::from (wi::mod_inv (a, m), + p, TYPE_SIGN (itype)); + wide_int_to_tree (itype, wi::mul (i, wi::to_wide (@2))); + }))))) /* Simplify comparison of something with itself. For IEEE floating-point, we can only do some of these simplifications. */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c new file mode 100644 index 0000000..c830a3d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fwrapv -fdump-tree-gimple" } */ + +typedef __INT32_TYPE__ int32_t; +typedef unsigned __INT32_TYPE__ uint32_t; + +int e(int32_t x){return 3*x==5;} +int f(int32_t x){return 3*x==-5;} +int g(int32_t x){return -3*x==5;} +int h(int32_t x){return 7*x==3;} +int i(uint32_t x){return 7*x==3;} + +/* { dg-final { scan-tree-dump-times "== 1431655767" 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "== -1431655767" 2 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "== 613566757" 2 "gimple" } } */ diff --git a/gcc/wide-int.cc b/gcc/wide-int.cc index 03be007..f4d949c 100644 --- a/gcc/wide-int.cc +++ b/gcc/wide-int.cc @@ -2223,6 +2223,39 @@ wi::round_up_for_mask (const wide_int &val, const wide_int &mask) return (val | tmp) & -tmp; } +/* Compute the modular multiplicative inverse of A modulo B + using extended Euclid's algorithm. Assumes A and B are coprime, + and that A and B have the same precision. */ +wide_int +wi::mod_inv (const wide_int &a, const wide_int &b) +{ + /* Verify the assumption. */ + gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1)); + + unsigned int p = a.get_precision () + 1; + gcc_checking_assert (b.get_precision () + 1 == p); + wide_int c = wide_int::from (a, p, UNSIGNED); + wide_int d = wide_int::from (b, p, UNSIGNED); + wide_int x0 = wide_int::from (0, p, UNSIGNED); + wide_int x1 = wide_int::from (1, p, UNSIGNED); + + if (wi::eq_p (b, 1)) + return wide_int::from (1, p, UNSIGNED); + + while (wi::gt_p (c, 1, UNSIGNED)) + { + wide_int t = d; + wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d); + c = t; + wide_int s = x0; + x0 = wi::sub (x1, wi::mul (q, x0)); + x1 = s; + } + if (wi::lt_p (x1, 0, SIGNED)) + x1 += d; + return x1; +} + /* * Private utilities. */ diff --git a/gcc/wide-int.h b/gcc/wide-int.h index bb0d16b..39cd5b9 100644 --- a/gcc/wide-int.h +++ b/gcc/wide-int.h @@ -3389,6 +3389,8 @@ namespace wi wide_int round_down_for_mask (const wide_int &, const wide_int &); wide_int round_up_for_mask (const wide_int &, const wide_int &); + wide_int mod_inv (const wide_int &a, const wide_int &b); + template T mask (unsigned int, bool); -- 2.7.4