From aedff010f0b3fef486319a887b71bc625a4a2b78 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Mon, 7 Dec 2009 22:22:45 +0000 Subject: [PATCH] Remove XOP splitters. * config/i386/i386-protos.h (ix86_expand_fma4_multiple_memory): Removed. * config/i386/i386.c (ix86_expand_fma4_multiple_memory): Removed. * config/i386/sse.md: Remove all XOP splitters. Allow the second and fourth operands of XOP multiply-add insns to be nonimmediate. From-SVN: r155057 --- gcc/ChangeLog | 9 +++ gcc/config/i386/i386-protos.h | 2 - gcc/config/i386/i386.c | 30 -------- gcc/config/i386/sse.md | 163 +++++++----------------------------------- 4 files changed, 36 insertions(+), 168 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8c9ab25..a7694cb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,14 @@ 2009-12-07 Sebastian Pop + * config/i386/i386-protos.h (ix86_expand_fma4_multiple_memory): + Removed. + * config/i386/i386.c (ix86_expand_fma4_multiple_memory): Removed. + * config/i386/sse.md: Remove all XOP splitters. + Allow the second and fourth operands of XOP multiply-add insns + to be nonimmediate. + +2009-12-07 Sebastian Pop + * config/i386/sse.md: Remove all FMA4 splitters. Allow the second operand of FMA4 insns to be a nonimmediate. Fix comments punctuation. diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index cf29cc7..aa2ccd7 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -218,8 +218,6 @@ extern void ix86_expand_vector_set (bool, rtx, rtx, int); extern void ix86_expand_vector_extract (bool, rtx, rtx, int); extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx); -extern bool ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode); - extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned); /* In i386-c.c */ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6cd9d7d..7cafdf6 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -28808,36 +28808,6 @@ ix86_expand_round (rtx operand0, rtx operand1) } -/* Fixup an FMA4 or XOP instruction that has 2 memory input references - into a form the hardware will allow by using the destination - register to load one of the memory operations. Presently this is - used by the multiply/add routines to allow 2 memory references. */ - -bool -ix86_expand_fma4_multiple_memory (rtx operands[], - enum machine_mode mode) -{ - rtx scratch = operands[0]; - - gcc_assert (register_operand (operands[0], mode)); - gcc_assert (register_operand (operands[1], mode)); - gcc_assert (MEM_P (operands[2]) && MEM_P (operands[3])); - - if (reg_mentioned_p (scratch, operands[1])) - { - if (!can_create_pseudo_p ()) - return false; - scratch = gen_reg_rtx (mode); - } - - emit_move_insn (scratch, operands[3]); - if (rtx_equal_p (operands[2], operands[3])) - operands[2] = operands[3] = scratch; - else - operands[3] = scratch; - return true; -} - /* Table of valid machine attributes. */ static const struct attribute_spec ix86_attribute_table[] = { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 6500de3..24d6635 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10132,89 +10132,50 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; XOP parallel integer multiply/add instructions. -;; Note the instruction does not allow the value being added to be a memory -;; operation. However by pretending via the nonimmediate_operand predicate -;; that it does and splitting it later allows the following to be recognized: -;; a[i] = b[i] * c[i] + d[i]; +;; Note the XOP multiply/add instructions +;; a[i] = b[i] * c[i] + d[i]; +;; do not allow the value being added to be a memory operation. (define_insn "xop_pmacsww" [(set (match_operand:V8HI 0 "register_operand" "=x") (plus:V8HI (mult:V8HI - (match_operand:V8HI 1 "register_operand" "%x") + (match_operand:V8HI 1 "nonimmediate_operand" "%x") (match_operand:V8HI 2 "nonimmediate_operand" "xm")) - (match_operand:V8HI 3 "register_operand" "x")))] + (match_operand:V8HI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; Split pmacsww with two memory operands into a load and the pmacsww. -(define_split - [(set (match_operand:V8HI 0 "register_operand" "") - (plus:V8HI - (mult:V8HI (match_operand:V8HI 1 "register_operand" "") - (match_operand:V8HI 2 "memory_operand" "")) - (match_operand:V8HI 3 "memory_operand" "")))] - "TARGET_XOP" - [(set (match_dup 0) - (plus:V8HI - (mult:V8HI (match_dup 1) (match_dup 2)) - (match_dup 3)))] -{ - if (!ix86_expand_fma4_multiple_memory (operands, V8HImode)) - FAIL; -}) - (define_insn "xop_pmacssww" [(set (match_operand:V8HI 0 "register_operand" "=x") (ss_plus:V8HI - (mult:V8HI (match_operand:V8HI 1 "register_operand" "%x") + (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x") (match_operand:V8HI 2 "nonimmediate_operand" "xm")) - (match_operand:V8HI 3 "register_operand" "x")))] + (match_operand:V8HI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; Note the instruction does not allow the value being added to be a memory -;; operation. However by pretending via the nonimmediate_operand predicate -;; that it does and splitting it later allows the following to be recognized: -;; a[i] = b[i] * c[i] + d[i]; (define_insn "xop_pmacsdd" [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (mult:V4SI - (match_operand:V4SI 1 "register_operand" "%x") + (match_operand:V4SI 1 "nonimmediate_operand" "%x") (match_operand:V4SI 2 "nonimmediate_operand" "xm")) - (match_operand:V4SI 3 "register_operand" "x")))] + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -;; Split pmacsdd with two memory operands into a load and the pmacsdd. -(define_split - [(set (match_operand:V4SI 0 "register_operand" "") - (plus:V4SI - (mult:V4SI (match_operand:V4SI 1 "register_operand" "") - (match_operand:V4SI 2 "memory_operand" "")) - (match_operand:V4SI 3 "memory_operand" "")))] - "TARGET_XOP" - [(set (match_dup 0) - (plus:V4SI - (mult:V4SI (match_dup 1) (match_dup 2)) - (match_dup 3)))] -{ - if (!ix86_expand_fma4_multiple_memory (operands, V4SImode)) - FAIL; -}) - (define_insn "xop_pmacssdd" [(set (match_operand:V4SI 0 "register_operand" "=x") (ss_plus:V4SI - (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x") + (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") (match_operand:V4SI 2 "nonimmediate_operand" "xm")) - (match_operand:V4SI 3 "register_operand" "x")))] + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") @@ -10226,14 +10187,14 @@ (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "%x") + (match_operand:V4SI 1 "nonimmediate_operand" "%x") (parallel [(const_int 1) (const_int 3)]))) (vec_select:V2SI (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3)]))) - (match_operand:V2DI 3 "register_operand" "x")))] + (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") @@ -10245,7 +10206,7 @@ (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "%x") + (match_operand:V4SI 1 "nonimmediate_operand" "%x") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI @@ -10253,7 +10214,7 @@ (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2)])))) - (match_operand:V2DI 3 "register_operand" "x")))] + (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") @@ -10265,7 +10226,7 @@ (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "%x") + (match_operand:V4SI 1 "nonimmediate_operand" "%x") (parallel [(const_int 1) (const_int 3)]))) (sign_extend:V2DI @@ -10273,47 +10234,12 @@ (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3)])))) - (match_operand:V2DI 3 "register_operand" "x")))] + (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn_and_split "*xop_pmacsdql_mem" - [(set (match_operand:V2DI 0 "register_operand" "=&x") - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "%x") - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 1) - (const_int 3)])))) - (match_operand:V2DI 3 "memory_operand" "m")))] - "TARGET_XOP" - "#" - "&& reload_completed" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 1) - (const_int 3)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 1) - (const_int 3)])))) - (match_dup 0)))]) - ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so ;; fake it with a multiply/add. In general, we expect the define_split to ;; occur before register allocation, so we have to handle the corner case where @@ -10362,7 +10288,7 @@ (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "%x") + (match_operand:V4SI 1 "nonimmediate_operand" "%x") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI @@ -10370,47 +10296,12 @@ (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2)])))) - (match_operand:V2DI 3 "register_operand" "x")))] + (match_operand:V2DI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) -(define_insn_and_split "*xop_pmacsdqh_mem" - [(set (match_operand:V2DI 0 "register_operand" "=&x") - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 1 "register_operand" "%x") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm") - (parallel [(const_int 0) - (const_int 2)])))) - (match_operand:V2DI 3 "memory_operand" "m")))] - "TARGET_XOP" - "#" - "&& reload_completed" - [(set (match_dup 0) - (match_dup 3)) - (set (match_dup 0) - (plus:V2DI - (mult:V2DI - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 1) - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2DI - (vec_select:V2SI - (match_dup 2) - (parallel [(const_int 0) - (const_int 2)])))) - (match_dup 0)))]) - ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so ;; fake it with a multiply/add. In general, we expect the define_split to ;; occur before register allocation, so we have to handle the corner case where @@ -10460,7 +10351,7 @@ (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "register_operand" "%x") + (match_operand:V8HI 1 "nonimmediate_operand" "%x") (parallel [(const_int 1) (const_int 3) (const_int 5) @@ -10472,7 +10363,7 @@ (const_int 3) (const_int 5) (const_int 7)])))) - (match_operand:V4SI 3 "register_operand" "x")))] + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") @@ -10484,7 +10375,7 @@ (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "register_operand" "%x") + (match_operand:V8HI 1 "nonimmediate_operand" "%x") (parallel [(const_int 1) (const_int 3) (const_int 5) @@ -10496,7 +10387,7 @@ (const_int 3) (const_int 5) (const_int 7)])))) - (match_operand:V4SI 3 "register_operand" "x")))] + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") @@ -10509,7 +10400,7 @@ (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "register_operand" "%x") + (match_operand:V8HI 1 "nonimmediate_operand" "%x") (parallel [(const_int 0) (const_int 2) (const_int 4) @@ -10536,7 +10427,7 @@ (const_int 3) (const_int 5) (const_int 7)]))))) - (match_operand:V4SI 3 "register_operand" "x")))] + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") @@ -10549,7 +10440,7 @@ (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "register_operand" "%x") + (match_operand:V8HI 1 "nonimmediate_operand" "%x") (parallel [(const_int 0) (const_int 2) (const_int 4) @@ -10576,7 +10467,7 @@ (const_int 3) (const_int 5) (const_int 7)]))))) - (match_operand:V4SI 3 "register_operand" "x")))] + (match_operand:V4SI 3 "nonimmediate_operand" "x")))] "TARGET_XOP" "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") -- 2.7.4