From 576714b309b330df0e80e34114bcdf0bba35e146 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 5 Jan 2021 16:35:22 +0100 Subject: [PATCH] phiopt: Optimize x < 0 ? ~y : y to (x >> 31) ^ y [PR96928] As requested in the PR, the one's complement abs can be done more efficiently without cmov or branching. Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize it in ifcvt, on x86_64 with -m32 we generate in the end the exact same code, but with -m64: movl %edi, %eax - notl %eax - cmpl %edi, %eax - cmovl %edi, %eax + sarl $31, %eax + xorl %edi, %eax ret 2021-01-05 Jakub Jelinek PR tree-optimization/96928 * tree-ssa-phiopt.c (xor_replacement): New function. (tree_ssa_phiopt_worker): Call it. * gcc.dg/tree-ssa/pr96928.c: New test. * gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1, instead of scanning rtl dump for ifcvt message check assembly for xor instruction. --- gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 38 ++++++++ .../gcc.target/i386/ifcvt-onecmpl-abs-1.c | 4 +- gcc/tree-ssa-phiopt.c | 108 +++++++++++++++++++++ 3 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c new file mode 100644 index 0000000..2091357 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c @@ -0,0 +1,38 @@ +/* PR tree-optimization/96928 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-phiopt2" } */ +/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */ +/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */ +/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */ +/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */ +/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */ + +int +foo (int a) +{ + return a < 0 ? ~a : a; +} + +int +bar (int a, int b) +{ + return a < 0 ? ~b : b; +} + +unsigned +baz (int a, unsigned int b) +{ + return a < 0 ? ~b : b; +} + +unsigned +qux (int a, unsigned int c) +{ + return a >= 0 ? ~c : c; +} + +int +corge (int a, int b) +{ + return a >= 0 ? b : ~b; +} diff --git a/gcc/testsuite/gcc.target/i386/ifcvt-onecmpl-abs-1.c b/gcc/testsuite/gcc.target/i386/ifcvt-onecmpl-abs-1.c index 195da3f..6e02dd7 100644 --- a/gcc/testsuite/gcc.target/i386/ifcvt-onecmpl-abs-1.c +++ b/gcc/testsuite/gcc.target/i386/ifcvt-onecmpl-abs-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-rtl-ce1" } */ +/* { dg-options "-O2" } */ /* Check code generation for one's complement version of abs */ @@ -10,4 +10,4 @@ int onecmplabs(int x) return x; } -/* { dg-final { scan-rtl-dump "succeeded through noce_try_abs" "ce1" } } */ +/* { dg-final { scan-assembler "\txor" } } */ diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index 7ab2888..bbc78af 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -62,6 +62,8 @@ static bool minmax_replacement (basic_block, basic_block, edge, edge, gimple *, tree, tree); static bool abs_replacement (basic_block, basic_block, edge, edge, gimple *, tree, tree); +static bool xor_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, edge, edge, gimple *, tree, tree); @@ -346,6 +348,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) cfgchanged = true; else if (!early_p + && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, e2, phi, arg0, arg1)) @@ -2098,6 +2103,109 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, return true; } +/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */ + +static bool +xor_replacement (basic_block cond_bb, basic_block middle_bb, + edge e0 ATTRIBUTE_UNUSED, edge e1, + gimple *phi, tree arg0, tree arg1) +{ + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1))) + return false; + + /* OTHER_BLOCK must have only one executable statement which must have the + form arg0 = ~arg1 or arg1 = ~arg0. */ + + gimple *assign = last_and_only_stmt (middle_bb); + /* If we did not find the proper one's complement assignment, then we cannot + optimize. */ + if (assign == NULL) + return false; + + /* If we got here, then we have found the only executable statement + in OTHER_BLOCK. If it is anything other than arg = ~arg1 or + arg1 = ~arg0, then we cannot optimize. */ + if (!is_gimple_assign (assign)) + return false; + + if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR) + return false; + + tree lhs = gimple_assign_lhs (assign); + tree rhs = gimple_assign_rhs1 (assign); + + /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ + if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0)) + return false; + + gimple *cond = last_stmt (cond_bb); + tree result = PHI_RESULT (phi); + + /* Only relationals comparing arg[01] against zero are interesting. */ + enum tree_code cond_code = gimple_cond_code (cond); + if (cond_code != LT_EXPR && cond_code != GE_EXPR) + return false; + + /* Make sure the conditional is x OP 0. */ + tree clhs = gimple_cond_lhs (cond); + if (TREE_CODE (clhs) != SSA_NAME + || !INTEGRAL_TYPE_P (TREE_TYPE (clhs)) + || TYPE_UNSIGNED (TREE_TYPE (clhs)) + || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1)) + || !integer_zerop (gimple_cond_rhs (cond))) + return false; + + /* We need to know which is the true edge and which is the false + edge so that we know if have xor or inverted xor. */ + edge true_edge, false_edge; + extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); + + /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we + will need to invert the result. Similarly for LT_EXPR if + the false edge goes to OTHER_BLOCK. */ + edge e; + if (cond_code == GE_EXPR) + e = true_edge; + else + e = false_edge; + + bool invert = e->dest == middle_bb; + + result = duplicate_ssa_name (result, NULL); + + gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); + + int prec = TYPE_PRECISION (TREE_TYPE (clhs)); + gimple *new_stmt + = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs, + build_int_cst (integer_type_node, prec - 1)); + gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); + + if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs))) + { + new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), + NOP_EXPR, gimple_assign_lhs (new_stmt)); + gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); + } + lhs = gimple_assign_lhs (new_stmt); + + if (invert) + { + new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), + BIT_NOT_EXPR, rhs); + gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); + rhs = gimple_assign_lhs (new_stmt); + } + + new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs); + gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); + + replace_phi_edge_with_variable (cond_bb, e1, phi, result); + + /* Note that we optimized this PHI. */ + return true; +} + /* Auxiliary functions to determine the set of memory accesses which can't trap because they are preceded by accesses to the same memory portion. We do that for MEM_REFs, so we only need to track -- 2.7.4