From c072fd236dc08f990bfcffd98b27f211a39bb404 Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Thu, 6 Aug 2020 09:15:25 +0100 Subject: [PATCH] x86_64: Integer min/max improvements. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch tweaks the way that min and max are expanded, so that the semantics of these operations are visible to the early RTL optimization passes, until split into explicit comparison and conditional move instructions. The good news is that i386.md already contains all of the required logic (many thanks to Richard Biener and Uros Bizjak), but this is currently only enabled to scalar-to-vector (STV) synthesis of min/max instructions. This change enables this functionality for all TARGET_CMOVE architectures for SImode, HImode and DImode. 2020-08-06 Roger Sayle Uroš Bizjak gcc/ChangeLog * config/i386/i386.md (MAXMIN_IMODE): No longer needed. (3): Support SWI248 and general_operand for second operand, when TARGET_CMOVE. (3_1 splitter): Optimize comparisons against 0, 1 and -1 to use "test" instead of "cmp". (*di3_doubleword): Likewise, allow general_operand and enable on TARGET_CMOVE. (peephole2): Convert clearing a register after a flag setting instruction into an xor followed by the original flag setter. gcc/testsuite/ChangeLog * gcc.target/i386/minmax-8.c: New test. * gcc.target/i386/minmax-9.c: New test. * gcc.target/i386/minmax-10.c: New test. * gcc.target/i386/minmax-11.c: New test. --- gcc/config/i386/i386.md | 74 +++++++++++++++++++++++-------- gcc/testsuite/gcc.target/i386/minmax-10.c | 38 ++++++++++++++++ gcc/testsuite/gcc.target/i386/minmax-11.c | 12 +++++ gcc/testsuite/gcc.target/i386/minmax-8.c | 18 ++++++++ gcc/testsuite/gcc.target/i386/minmax-9.c | 23 ++++++++++ 5 files changed, 146 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/minmax-10.c create mode 100644 gcc/testsuite/gcc.target/i386/minmax-11.c create mode 100644 gcc/testsuite/gcc.target/i386/minmax-8.c create mode 100644 gcc/testsuite/gcc.target/i386/minmax-9.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b24a455..4e916bf 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -18809,45 +18809,68 @@ ;; min/max patterns -(define_mode_iterator MAXMIN_IMODE - [(SI "TARGET_SSE4_1") (DI "TARGET_AVX512VL")]) (define_code_attr maxmin_rel [(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")]) (define_expand "3" [(parallel - [(set (match_operand:MAXMIN_IMODE 0 "register_operand") - (maxmin:MAXMIN_IMODE - (match_operand:MAXMIN_IMODE 1 "register_operand") - (match_operand:MAXMIN_IMODE 2 "nonimmediate_operand"))) + [(set (match_operand:SWI248 0 "register_operand") + (maxmin:SWI248 + (match_operand:SWI248 1 "register_operand") + (match_operand:SWI248 2 "general_operand"))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_STV") + "TARGET_CMOVE") (define_insn_and_split "*3_1" - [(set (match_operand:MAXMIN_IMODE 0 "register_operand") - (maxmin:MAXMIN_IMODE - (match_operand:MAXMIN_IMODE 1 "register_operand") - (match_operand:MAXMIN_IMODE 2 "nonimmediate_operand"))) + [(set (match_operand:SWI248 0 "register_operand") + (maxmin:SWI248 + (match_operand:SWI248 1 "register_operand") + (match_operand:SWI248 2 "general_operand"))) (clobber (reg:CC FLAGS_REG))] - "(TARGET_64BIT || mode != DImode) && TARGET_STV + "TARGET_CMOVE && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) - (if_then_else:MAXMIN_IMODE (match_dup 3) + (if_then_else:SWI248 (match_dup 3) (match_dup 1) (match_dup 2)))] { machine_mode mode = mode; + rtx cmp_op = operands[2]; - if (!register_operand (operands[2], mode)) - operands[2] = force_reg (mode, operands[2]); + if (!register_operand (cmp_op, mode)) + operands[2] = force_reg (mode, cmp_op); enum rtx_code code = ; - machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], operands[2]); + + if (cmp_op == const1_rtx) + { + /* Convert smax (x, 1) into (x > 0 ? x : 1). + Convert umax (x, 1) into (x != 0 ? x : 1). + Convert ?min (x, 1) into (x <= 0 ? x : 1). */ + cmp_op = const0_rtx; + if (code == GE) + code = GT; + else if (code == GEU) + code = NE; + } + /* Convert smin (x, -1) into (x < 0 ? x : -1). */ + else if (cmp_op == constm1_rtx && code == LE) + { + cmp_op = const0_rtx; + code = LT; + } + /* Convert smax (x, -1) into (x >= 0 ? x : -1). */ + else if (cmp_op == constm1_rtx && code == GE) + cmp_op = const0_rtx; + else if (cmp_op != const0_rtx) + cmp_op = operands[2]; + + machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], cmp_op); rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG); - rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], operands[2]); + rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], cmp_op); emit_insn (gen_rtx_SET (flags, tmp)); operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); @@ -18856,9 +18879,9 @@ (define_insn_and_split "*di3_doubleword" [(set (match_operand:DI 0 "register_operand") (maxmin:DI (match_operand:DI 1 "register_operand") - (match_operand:DI 2 "nonimmediate_operand"))) + (match_operand:DI 2 "general_operand"))) (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL + "!TARGET_64BIT && TARGET_CMOVE && ix86_pre_reload_split ()" "#" "&& 1" @@ -18910,6 +18933,19 @@ gcc_unreachable (); } }) + +;; Avoid clearing a register between a flags setting comparison and its use, +;; i.e. prefer "xorl %eax,%eax; test/cmp" over "test/cmp; movl $0, %eax". +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0)) + (set (match_operand:SWI 1 "register_operand") (const_int 0))] + "peep2_regno_dead_p (0, FLAGS_REG) + && !reg_overlap_mentioned_p (operands[1], operands[0])" + [(set (match_dup 2) (match_dup 0))] +{ + operands[2] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + ix86_expand_clear (operands[1]); +}) ;; Misc patterns (?) diff --git a/gcc/testsuite/gcc.target/i386/minmax-10.c b/gcc/testsuite/gcc.target/i386/minmax-10.c new file mode 100644 index 0000000..b044462 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/minmax-10.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#define max(a,b) (((a) > (b))? (a) : (b)) +#define min(a,b) (((a) < (b))? (a) : (b)) + +int smax1(int x) +{ + return max(x,1); +} + +int smin1(int x) +{ + return min(x,1); +} + +int smaxm1(int x) +{ + return max(x,-1); +} + +int sminm1(int x) +{ + return min(x,-1); +} + +unsigned int umax1(unsigned int x) +{ + return max(x,1); +} + +unsigned int umin1(unsigned int x) +{ + return min(x,1); +} + +/* { dg-final { scan-assembler-times "test" 6 } } */ +/* { dg-final { scan-assembler-not "cmp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/minmax-11.c b/gcc/testsuite/gcc.target/i386/minmax-11.c new file mode 100644 index 0000000..a8c2df5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/minmax-11.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-tree-reassoc" } */ + +#define max(a,b) (((a) > (b))? (a) : (b)) + +int foo(int x) +{ + int y = max(x,12345); + return max(y,87654); +} + +/* { dg-final { scan-assembler-not "12345" } } */ diff --git a/gcc/testsuite/gcc.target/i386/minmax-8.c b/gcc/testsuite/gcc.target/i386/minmax-8.c new file mode 100644 index 0000000..1f7e466 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/minmax-8.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-Os" } */ + +#define max(a,b) (((a) > (b))? (a) : (b)) +#define min(a,b) (((a) < (b))? (a) : (b)) + +int foo(int x) +{ + return max(x,12345); +} + +int bar(int x) +{ + return min(x,87654); +} + +/* { dg-final { scan-assembler-times "12345" 1 } } */ +/* { dg-final { scan-assembler-times "87654" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/minmax-9.c b/gcc/testsuite/gcc.target/i386/minmax-9.c new file mode 100644 index 0000000..3b94023 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/minmax-9.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-Os" } */ + +#define max(a,b) (((a) > (b))? (a) : (b)) +#define min(a,b) (((a) < (b))? (a) : (b)) + +int foo(int x) +{ + return max(x,0); +} + +int bar(int x) +{ + return min(x,0); +} + +unsigned int baz(unsigned int x) +{ + return min(x,1); +} + +/* { dg-final { scan-assembler-times "xor" 3 } } */ +/* { dg-final { scan-assembler-times "test" 3 } } */ -- 2.7.4