From 2af6dd77ea742d4ee911f466878624972929508a Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 2 Sep 2021 11:25:07 +0200 Subject: [PATCH] match.pd: Demote IFN_{ADD,SUB,MUL}_OVERFLOW operands [PR99591] The overflow builtins work on infinite precision integers and then convert to the result type's precision, so any argument promotions are useless. The expand_arith_overflow expansion is able to demote the arguments itself through get_range_pos_neg and get_min_precision calls and if needed promote to whatever mode it decides to perform the operations in, but if there are any promotions it demoted, those are already expanded. Normally combine would remove the useless sign or zero extensions when it sees the result of those is only used in a lowpart subreg, but typically those lowpart subregs appear multiple times in the pattern so that they describe properly the overflow behavior and combine gives up, so we end up with e.g. movswl %si, %esi movswl %di, %edi imulw %si, %di seto %al where both movswl insns are useless. The following patch fixes it by demoting operands of the ifns (only gets rid of integral to integral conversions that increase precision). While IFN_{ADD,MUL}_OVERFLOW are commutative and just one simplify would be enough, IFN_SUB_OVERFLOW is not, therefore two simplifications. 2021-09-02 Jakub Jelinek PR tree-optimization/99591 * match.pd: Demote operands of IFN_{ADD,SUB,MUL}_OVERFLOW if they were promoted. * gcc.target/i386/pr99591.c: New test. * gcc.target/i386/pr97950.c: Match or reject setb or jn?b instructions together with seta or jn?a. --- gcc/match.pd | 15 +++++++++++++++ gcc/testsuite/gcc.target/i386/pr97950.c | 4 ++-- gcc/testsuite/gcc.target/i386/pr99591.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr99591.c diff --git a/gcc/match.pd b/gcc/match.pd index f421c74..f920bc4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5587,6 +5587,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (with { tree t = TREE_TYPE (@0), cpx = build_complex_type (t); } (cmp (imagpart (IFN_MUL_OVERFLOW:cpx @0 @1)) { build_zero_cst (t); }))))) +/* Demote operands of IFN_{ADD,SUB,MUL}_OVERFLOW. */ +(for ovf (IFN_ADD_OVERFLOW IFN_SUB_OVERFLOW IFN_MUL_OVERFLOW) + (simplify + (ovf (convert@2 @0) @1) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && INTEGRAL_TYPE_P (TREE_TYPE (@2)) + && TYPE_PRECISION (TREE_TYPE (@2)) > TYPE_PRECISION (TREE_TYPE (@0))) + (ovf @0 @1))) + (simplify + (ovf @1 (convert@2 @0)) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && INTEGRAL_TYPE_P (TREE_TYPE (@2)) + && TYPE_PRECISION (TREE_TYPE (@2)) > TYPE_PRECISION (TREE_TYPE (@0))) + (ovf @1 @0)))) + /* Simplification of math builtins. These rules must all be optimizations as well as IL simplifications. If there is a possibility that the new form could be a pessimization, the rule should go in the canonicalization diff --git a/gcc/testsuite/gcc.target/i386/pr97950.c b/gcc/testsuite/gcc.target/i386/pr97950.c index 277311d..185d733 100644 --- a/gcc/testsuite/gcc.target/i386/pr97950.c +++ b/gcc/testsuite/gcc.target/i386/pr97950.c @@ -1,10 +1,10 @@ /* PR target/95950 */ /* { dg-do compile } */ /* { dg-options "-O2 -mtune=generic" } */ -/* { dg-final { scan-assembler-times "\tseta\t" 4 } } */ +/* { dg-final { scan-assembler-times "\tset\[ab]\t" 4 } } */ /* { dg-final { scan-assembler-times "\tseto\t" 16 } } */ /* { dg-final { scan-assembler-times "\tsetc\t" 4 } } */ -/* { dg-final { scan-assembler-not "\tjn?a\t" } } */ +/* { dg-final { scan-assembler-not "\tjn?\[ab]\t" } } */ /* { dg-final { scan-assembler-not "\tjn?o\t" } } */ /* { dg-final { scan-assembler-not "\tjn?c\t" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr99591.c b/gcc/testsuite/gcc.target/i386/pr99591.c new file mode 100644 index 0000000..01d8485 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr99591.c @@ -0,0 +1,32 @@ +/* PR tree-optimization/99591 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not "\tmovs\[bw]l\t" } } */ + +int +foo (signed char a, signed char b) +{ + signed char r; + return __builtin_add_overflow (a, b, &r); +} + +int +bar (short a, short b) +{ + short r; + return __builtin_add_overflow (a, b, &r); +} + +int +baz (signed char a, signed char b) +{ + signed char r; + return __builtin_add_overflow ((int) a, (int) b, &r); +} + +int +qux (short a, short b) +{ + short r; + return __builtin_add_overflow ((int) a, (int) b, &r); +} -- 2.7.4