From 558d9f795b5c7f560bef4539e19c4f2e9e99533b Mon Sep 17 00:00:00 2001 From: Alexander Ivchenko Date: Fri, 11 Oct 2013 14:00:11 +0000 Subject: [PATCH] i386.c (ix86_rtx_costs): Enable fma for TARGET_AVX512F. * config/i386/i386.c (ix86_rtx_costs): Enable fma for TARGET_AVX512F. * config/i386/sse.md (FMAMODEM): Changed modes and conditions. (FMAMODE): Ditto. (fma4): Removed condition. (fms4): Ditto. (fnma4): Ditto. (fnms4): Ditto. (fma4i_fmadd_): Ditto. (*fma_fmadd_): Ditto. (*fma_fmsub_): Ditto. (*fma_fnmadd_): Ditto. (*fma_fnmsub_): Ditto. (fmaddsub_): Allow for TARGET_AVX512F. (*fma_fmaddsub_): Ditto. (*fma_fmsubadd_): Ditto. (*fmai_fmadd_): Ditto. (*fmai_fmsub_): Ditto. (*fmai_fnmadd_): Ditto. (*fmai_fnmsub_): Ditto. Co-Authored-By: Andrey Turetskiy Co-Authored-By: Anna Tikhonova Co-Authored-By: Ilya Tocar Co-Authored-By: Ilya Verbin Co-Authored-By: Kirill Yukhin Co-Authored-By: Maxim Kuznetsov Co-Authored-By: Michael Zolotukhin Co-Authored-By: Sergey Lega From-SVN: r203439 --- gcc/ChangeLog | 30 +++++++++++++++++++++++ gcc/config/i386/i386.c | 2 +- gcc/config/i386/sse.md | 64 +++++++++++++++++++++++++++++++++----------------- 3 files changed, 73 insertions(+), 23 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7a694c0..0fb3d0b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -8,6 +8,36 @@ Kirill Yukhin Michael Zolotukhin + * config/i386/i386.c (ix86_rtx_costs): Enable fma for TARGET_AVX512F. + * config/i386/sse.md (FMAMODEM): Changed modes and conditions. + (FMAMODE): Ditto. + (fma4): Removed condition. + (fms4): Ditto. + (fnma4): Ditto. + (fnms4): Ditto. + (fma4i_fmadd_): Ditto. + (*fma_fmadd_): Ditto. + (*fma_fmsub_): Ditto. + (*fma_fnmadd_): Ditto. + (*fma_fnmsub_): Ditto. + (fmaddsub_): Allow for TARGET_AVX512F. + (*fma_fmaddsub_): Ditto. + (*fma_fmsubadd_): Ditto. + (*fmai_fmadd_): Ditto. + (*fmai_fmsub_): Ditto. + (*fmai_fnmadd_): Ditto. + (*fmai_fnmsub_): Ditto. + +2013-10-11 Alexander Ivchenko + Maxim Kuznetsov + Sergey Lega + Anna Tikhonova + Ilya Tocar + Andrey Turetskiy + Ilya Verbin + Kirill Yukhin + Michael Zolotukhin + * config/i386/sse.md (VI248_AVX2_8_AVX512F): New. (VI124_256): Changed to ... (VI124_256_48_512): This. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 168a2ac..37c1bec 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -34811,7 +34811,7 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total, rtx sub; gcc_assert (FLOAT_MODE_P (mode)); - gcc_assert (TARGET_FMA || TARGET_FMA4); + gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); /* ??? SSE scalar/vector cost should be used here. */ /* ??? Bald assumption that fma has the same cost as fmul. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a380690..0b81521 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2254,9 +2254,22 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; The standard names for scalar FMA are only available with SSE math enabled. -(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH") - (DF "TARGET_SSE_MATH") - V4SF V2DF V8SF V4DF]) +;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't +;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA +;; and TARGET_FMA4 are both false. +;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA +;; one must force the EVEX encoding of the fma insns. Ideally we'd improve +;; GAS to allow proper prefix selection. However, for the moment all hardware +;; that supports AVX512F also supports FMA so we can ignore this for now. +(define_mode_iterator FMAMODEM + [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)") + (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)") + (V4SF "TARGET_FMA || TARGET_FMA4") + (V2DF "TARGET_FMA || TARGET_FMA4") + (V8SF "TARGET_FMA || TARGET_FMA4") + (V4DF "TARGET_FMA || TARGET_FMA4") + (V16SF "TARGET_AVX512F") + (V8DF "TARGET_AVX512F")]) (define_expand "fma4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -2264,7 +2277,7 @@ (match_operand:FMAMODEM 1 "nonimmediate_operand") (match_operand:FMAMODEM 2 "nonimmediate_operand") (match_operand:FMAMODEM 3 "nonimmediate_operand")))] - "TARGET_FMA || TARGET_FMA4") + "") (define_expand "fms4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -2272,7 +2285,7 @@ (match_operand:FMAMODEM 1 "nonimmediate_operand") (match_operand:FMAMODEM 2 "nonimmediate_operand") (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] - "TARGET_FMA || TARGET_FMA4") + "") (define_expand "fnma4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -2280,7 +2293,7 @@ (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) (match_operand:FMAMODEM 2 "nonimmediate_operand") (match_operand:FMAMODEM 3 "nonimmediate_operand")))] - "TARGET_FMA || TARGET_FMA4") + "") (define_expand "fnms4" [(set (match_operand:FMAMODEM 0 "register_operand") @@ -2288,10 +2301,17 @@ (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) (match_operand:FMAMODEM 2 "nonimmediate_operand") (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] - "TARGET_FMA || TARGET_FMA4") + "") ;; The builtins for intrinsics are not constrained by SSE math enabled. -(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) +(define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") + (V4SF "TARGET_FMA || TARGET_FMA4") + (V2DF "TARGET_FMA || TARGET_FMA4") + (V8SF "TARGET_FMA || TARGET_FMA4") + (V4DF "TARGET_FMA || TARGET_FMA4") + (V16SF "TARGET_AVX512F") + (V8DF "TARGET_AVX512F")]) (define_expand "fma4i_fmadd_" [(set (match_operand:FMAMODE 0 "register_operand") @@ -2299,7 +2319,7 @@ (match_operand:FMAMODE 1 "nonimmediate_operand") (match_operand:FMAMODE 2 "nonimmediate_operand") (match_operand:FMAMODE 3 "nonimmediate_operand")))] - "TARGET_FMA || TARGET_FMA4") + "") (define_insn "*fma_fmadd_" [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") @@ -2307,7 +2327,7 @@ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x") (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))] - "TARGET_FMA || TARGET_FMA4" + "" "@ vfmadd132\t{%2, %3, %0|%0, %3, %2} vfmadd213\t{%3, %2, %0|%0, %2, %3} @@ -2318,14 +2338,14 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "*fma_fmsub_" +(define_insn "fma_fmsub_" [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x") (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))] - "TARGET_FMA || TARGET_FMA4" + "" "@ vfmsub132\t{%2, %3, %0|%0, %3, %2} vfmsub213\t{%3, %2, %0|%0, %2, %3} @@ -2336,14 +2356,14 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "*fma_fnmadd_" +(define_insn "fma_fnmadd_" [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x") (fma:FMAMODE (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")) (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))] - "TARGET_FMA || TARGET_FMA4" + "" "@ vfnmadd132\t{%2, %3, %0|%0, %3, %2} vfnmadd213\t{%3, %2, %0|%0, %2, %3} @@ -2362,7 +2382,7 @@ (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m") (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))] - "TARGET_FMA || TARGET_FMA4" + "" "@ vfnmsub132\t{%2, %3, %0|%0, %3, %2} vfnmsub213\t{%3, %2, %0|%0, %2, %3} @@ -2391,7 +2411,7 @@ (match_operand:VF 2 "nonimmediate_operand") (match_operand:VF 3 "nonimmediate_operand")] UNSPEC_FMADDSUB))] - "TARGET_FMA || TARGET_FMA4") + "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F") (define_insn "*fma_fmaddsub_" [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x") @@ -2400,7 +2420,7 @@ (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m") (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")] UNSPEC_FMADDSUB))] - "TARGET_FMA || TARGET_FMA4" + "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)" "@ vfmaddsub132\t{%2, %3, %0|%0, %3, %2} vfmaddsub213\t{%3, %2, %0|%0, %2, %3} @@ -2419,7 +2439,7 @@ (neg:VF (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))] UNSPEC_FMADDSUB))] - "TARGET_FMA || TARGET_FMA4" + "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)" "@ vfmsubadd132\t{%2, %3, %0|%0, %3, %2} vfmsubadd213\t{%3, %2, %0|%0, %2, %3} @@ -2453,7 +2473,7 @@ (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")) (match_dup 1) (const_int 1)))] - "TARGET_FMA" + "TARGET_FMA || TARGET_AVX512F" "@ vfmadd132\t{%2, %3, %0|%0, %3, %2} vfmadd213\t{%3, %2, %0|%0, %2, %3}" @@ -2470,7 +2490,7 @@ (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))) (match_dup 1) (const_int 1)))] - "TARGET_FMA" + "TARGET_FMA || TARGET_AVX512F" "@ vfmsub132\t{%2, %3, %0|%0, %3, %2} vfmsub213\t{%3, %2, %0|%0, %2, %3}" @@ -2487,7 +2507,7 @@ (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")) (match_dup 1) (const_int 1)))] - "TARGET_FMA" + "TARGET_FMA || TARGET_AVX512F" "@ vfnmadd132\t{%2, %3, %0|%0, %3, %2} vfnmadd213\t{%3, %2, %0|%0, %2, %3}" @@ -2505,7 +2525,7 @@ (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))) (match_dup 1) (const_int 1)))] - "TARGET_FMA" + "TARGET_FMA || TARGET_AVX512F" "@ vfnmsub132\t{%2, %3, %0|%0, %3, %2} vfnmsub213\t{%3, %2, %0|%0, %2, %3}" -- 2.7.4