From bab64f23e9aa03c3f309c7de92004c544667a5d6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 11 Feb 2009 08:56:41 +0000 Subject: [PATCH] i386.md: Add two new peephole2 to avoid mov followed by arithmetic with memory operands. gcc: 2009-02-06 Paolo Bonzini * config/i386/i386.md: Add two new peephole2 to avoid mov followed by arithmetic with memory operands. * config/i386/predicates.md (commutative_operator): New. gcc/testsuite: 2009-02-06 Paolo Bonzini * gcc.target/i386/pr38824.c: New testcase. From-SVN: r144098 --- gcc/ChangeLog | 6 ++++++ gcc/config/i386/i386.md | 32 ++++++++++++++++++++++++++++++++ gcc/config/i386/predicates.md | 4 ++++ gcc/testsuite/ChangeLog | 4 ++++ gcc/testsuite/gcc.target/i386/pr38824.c | 21 +++++++++++++++++++++ 5 files changed, 67 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr38824.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1d32e09..4dd1672 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2009-02-11 Paolo Bonzini + + * config/i386/i386.md: Add two new peephole2 to avoid mov followed + by arithmetic with memory operands. + * config/i386/predicates.md (commutative_operator): New. + 2009-02-10 Janis Johnson * doc/extend.texi (Fixed-Point Types): Break long paragraphs into diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 89a3b17..7a4511f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20706,6 +20706,38 @@ (clobber (reg:CC FLAGS_REG))])] "") +;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when the memory address +;; refers to the destination of the load! + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (match_dup 0) + (match_operator:SI 3 "commutative_operator" + [(match_dup 0) + (match_operand:SI 2 "memory_operand" "")])) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] != operands[1]" + [(set (match_dup 0) (match_dup 4)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 1)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = simplify_replace_rtx (operands[2], operands[0], operands[1]);") + +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "register_operand" "")) + (set (match_dup 0) + (match_operator 3 "commutative_operator" + [(match_dup 0) + (match_operand 2 "memory_operand" "")]))] + "operands[0] != operands[1] + && (MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 1)]))] + "") + ; Don't do logical operations with memory outputs ; ; These two don't make sense for PPro/PII -- we're expanding a 4-uop diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index bdac210..a8c01ad 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1050,6 +1050,10 @@ (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax,compare,minus,div, mod,udiv,umod,ashift,rotate,ashiftrt,lshiftrt,rotatert")) +;; Return true for COMMUTATIVE_P. +(define_predicate "commutative_operator" + (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax")) + ;; Return 1 if OP is a binary operator that can be promoted to wider mode. (define_predicate "promotable_binary_operator" (ior (match_code "plus,and,ior,xor,ashift") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 52817fa..9b64184 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2009-02-11 Paolo Bonzini + + * gcc.target/i386/pr38824.c: New testcase. + 2009-02-11 Jason Merrill PR c++/38649 diff --git a/gcc/testsuite/gcc.target/i386/pr38824.c b/gcc/testsuite/gcc.target/i386/pr38824.c new file mode 100644 index 0000000..637abfd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr38824.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse" } */ + +typedef float v4sf __attribute__ ((__vector_size__ (16))); + +void bench_1(float * out, float * in, float f, unsigned int n) +{ + n /= 4; + v4sf scalar = { f, f, f, f }; + do + { + v4sf arg = *(v4sf *)in; + v4sf result = arg + scalar; + *(v4sf *) out = result; + in += 4; + out += 4; + } + while (--n); +} + +/* { dg-final { scan-assembler-not "addps\[^\\n\]*%\[er\]" } } */ -- 2.7.4