Simplify FMA4 patterns with FMA rtx code.
authorrth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 19 Oct 2010 02:12:00 +0000 (02:12 +0000)
committerrth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 19 Oct 2010 02:12:00 +0000 (02:12 +0000)
Also fix incorrect rtl generation for scalar instructions.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@165676 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/i386/fma4intrin.h
gcc/config/i386/i386.c
gcc/config/i386/i386.md
gcc/config/i386/sse.md

index f9eea06..8f1595d 100644 (file)
@@ -1,3 +1,46 @@
+2010-10-18  Richard Henderson  <rth@redhat.com>
+
+       * config/i386/i386.c (IX86_BUILTIN_VFMSUBSS, IX86_BUILTIN_VFMSUBSD,
+       IX86_BUILTIN_VFMSUBPS, IX86_BUILTIN_VFMSUBPD,
+       IX86_BUILTIN_VFMSUBADDPS, IX86_BUILTIN_VFMSUBADDPD,
+       IX86_BUILTIN_VFNMADDSS, IX86_BUILTIN_VFNMADDSD,
+       IX86_BUILTIN_VFNMADDPS, IX86_BUILTIN_VFNMADDPD,
+       IX86_BUILTIN_VFNMSUBSS, IX86_BUILTIN_VFNMSUBSD,
+       IX86_BUILTIN_VFNMSUBPS, IX86_BUILTIN_VFNMSUBPD,
+       IX86_BUILTIN_VFMSUBADDPS256, IX86_BUILTIN_VFMSUBADDPD256,
+       IX86_BUILTIN_VFNMADDPS256, IX86_BUILTIN_VFNMADDPD256,
+       IX86_BUILTIN_VFNMSUBPS256, IX86_BUILTIN_VFNMSUBPD256): Remove.
+       (bdesc_multi_arg): Remove the corresponding builtins.
+       * config/i386/i386.md (UNSPEC_FMA4_INTRINSIC): Remove.
+       (UNSPEC_FMA4_FMSUBADD): Remove.
+       (UNSPEC_FMADDSUB): Rename from UNSPEC_FMA4_FMADDSUB.
+       * config/i386/sse.md (FMA4MODEF4): Remove.
+       (FMAMODE): Add.
+       (fma<mode>4): New expander.
+       (*fma4i_fmadd_<mode>): Macroize from fma4i_fmadd<mode>4 patterns,
+       and use FMA rtx code instead of UNSPEC_FMA4_INTRINSIC.
+       (*fma4i_fmsub_<mode>): Similarly.
+       (*fma4i_fnmadd_<mode>): Similarly.
+       (*fma4i_fnmsub_<mode>): Similarly.
+       (fma4i_vmfmadd_<mode>): Scalar patterns zero-extend, not merge
+       with the first operand.
+       (fma4i_fmaddsub_<mode>): Represent with UNSPEC_FMADDSUB instead
+       of explicit arithmetic.  Macroize with AVXMODEF2P.
+       (*fma4i_fmsubadd_<mode>): Represent with UNSPEC_FMADDSUB + NEG.
+       (xop_frcz<mode>2): Macroize with FMAMODE.
+       (xop_vmfrcz<mode>2): Scalar patterns zero-extend, not merge with
+       the first operand.
+       * config/i386/fma4intrin.h (_mm_msub_ps): Use vfmadd intrinsic with
+       extra negations.
+       (_mm_msub_pd, _mm_msub_ss, _mm_msub_sd): Likewise.
+       (_mm_nmacc_ps, _mm_nmacc_pd, _mm_nmacc_ss, _mm_nmacc_sd): Likewise.
+       (_mm_nmsub_ps, _mm_nmsub_pd, _mm_nmsub_ss, _mm_nmsub_sd): Likewise.
+       (_mm256_msub_ps, _mm256_msub_pd): Likewise.
+       (_mm256_nmacc_ps, _mm256_nmacc_pd): Likewise.
+       (_mm256_nmsub_ps, _mm256_nmsub_pd): Likewise.
+       (_mm_msubadd_ps): Use vfmaddsub intrinsic with extra negation.
+       (_mm_msubadd_pd, _mm256_msubadd_ps, _mm256_msubadd_pd): Likewise.
+
 2010-10-18  Bernd Schmidt  <bernds@codesourcery.com>
 
        PR rtl-optimization/45966
index 2bd411a..b910cd1 100644 (file)
@@ -64,73 +64,73 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif
 _mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
 
 {
-  return (__m128) __builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d) __builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128) __builtin_ia32_vfmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d) __builtin_ia32_vfmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128) __builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d) __builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128) __builtin_ia32_vfnmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d) __builtin_ia32_vfnmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128) __builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d) __builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128) __builtin_ia32_vfnmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d) __builtin_ia32_vfnmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -148,13 +148,13 @@ _mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
 {
-  return (__m128) __builtin_ia32_vfmsubaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+  return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
 {
-  return (__m128d) __builtin_ia32_vfmsubaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+  return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
 /* 256b Floating point multiply/add type instructions.  */
@@ -174,37 +174,37 @@ extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artif
 _mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
 
 {
-  return (__m256) __builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d) __builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256) __builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d) __builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256) __builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d) __builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -222,13 +222,13 @@ _mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
 {
-  return (__m256) __builtin_ia32_vfmsubaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
+  return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
 {
-  return (__m256d) __builtin_ia32_vfmsubaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
+  return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
 #endif
index eada313..7da2cfb 100644 (file)
@@ -22769,34 +22769,12 @@ enum ix86_builtins
   IX86_BUILTIN_VFMADDSD,
   IX86_BUILTIN_VFMADDPS,
   IX86_BUILTIN_VFMADDPD,
-  IX86_BUILTIN_VFMSUBSS,
-  IX86_BUILTIN_VFMSUBSD,
-  IX86_BUILTIN_VFMSUBPS,
-  IX86_BUILTIN_VFMSUBPD,
-  IX86_BUILTIN_VFMADDSUBPS,
-  IX86_BUILTIN_VFMADDSUBPD,
-  IX86_BUILTIN_VFMSUBADDPS,
-  IX86_BUILTIN_VFMSUBADDPD,
-  IX86_BUILTIN_VFNMADDSS,
-  IX86_BUILTIN_VFNMADDSD,
-  IX86_BUILTIN_VFNMADDPS,
-  IX86_BUILTIN_VFNMADDPD,
-  IX86_BUILTIN_VFNMSUBSS,
-  IX86_BUILTIN_VFNMSUBSD,
-  IX86_BUILTIN_VFNMSUBPS,
-  IX86_BUILTIN_VFNMSUBPD,
   IX86_BUILTIN_VFMADDPS256,
   IX86_BUILTIN_VFMADDPD256,
-  IX86_BUILTIN_VFMSUBPS256,
-  IX86_BUILTIN_VFMSUBPD256,
+  IX86_BUILTIN_VFMADDSUBPS,
+  IX86_BUILTIN_VFMADDSUBPD,
   IX86_BUILTIN_VFMADDSUBPS256,
   IX86_BUILTIN_VFMADDSUBPD256,
-  IX86_BUILTIN_VFMSUBADDPS256,
-  IX86_BUILTIN_VFMSUBADDPD256,
-  IX86_BUILTIN_VFNMADDPS256,
-  IX86_BUILTIN_VFNMADDPD256,
-  IX86_BUILTIN_VFNMSUBPS256,
-  IX86_BUILTIN_VFNMSUBPD256,
 
   IX86_BUILTIN_VPCMOV,
   IX86_BUILTIN_VPCMOV_V2DI,
@@ -23953,43 +23931,18 @@ static const struct builtin_description bdesc_args[] =
 
 static const struct builtin_description bdesc_multi_arg[] =
 {
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4,     "__builtin_ia32_vfmaddss",    IX86_BUILTIN_VFMADDSS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4,     "__builtin_ia32_vfmaddsd",    IX86_BUILTIN_VFMADDSD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4,       "__builtin_ia32_vfmaddps",    IX86_BUILTIN_VFMADDPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4,       "__builtin_ia32_vfmaddpd",    IX86_BUILTIN_VFMADDPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4,     "__builtin_ia32_vfmsubss",    IX86_BUILTIN_VFMSUBSS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4,     "__builtin_ia32_vfmsubsd",    IX86_BUILTIN_VFMSUBSD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4,       "__builtin_ia32_vfmsubps",    IX86_BUILTIN_VFMSUBPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4,       "__builtin_ia32_vfmsubpd",    IX86_BUILTIN_VFMSUBPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
-
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4,    "__builtin_ia32_vfnmaddss",   IX86_BUILTIN_VFNMADDSS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4,    "__builtin_ia32_vfnmaddsd",   IX86_BUILTIN_VFNMADDSD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4,      "__builtin_ia32_vfnmaddps",   IX86_BUILTIN_VFNMADDPS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4,      "__builtin_ia32_vfnmaddpd",   IX86_BUILTIN_VFNMADDPD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4,    "__builtin_ia32_vfnmsubss",   IX86_BUILTIN_VFNMSUBSS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4,    "__builtin_ia32_vfnmsubsd",   IX86_BUILTIN_VFNMSUBSD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4,      "__builtin_ia32_vfnmsubps",   IX86_BUILTIN_VFNMSUBPS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4,      "__builtin_ia32_vfnmsubpd",   IX86_BUILTIN_VFNMSUBPD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
-
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4,           "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4,           "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4,           "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4,           "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
-
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256,       "__builtin_ia32_vfmaddps256",    IX86_BUILTIN_VFMADDPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256,       "__builtin_ia32_vfmaddpd256",    IX86_BUILTIN_VFMADDPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256,       "__builtin_ia32_vfmsubps256",    IX86_BUILTIN_VFMSUBPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256,       "__builtin_ia32_vfmsubpd256",    IX86_BUILTIN_VFMSUBPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
-
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256,      "__builtin_ia32_vfnmaddps256",   IX86_BUILTIN_VFNMADDPS256,   UNKNOWN,      (int)MULTI_ARG_3_SF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256,      "__builtin_ia32_vfnmaddpd256",   IX86_BUILTIN_VFNMADDPD256,   UNKNOWN,      (int)MULTI_ARG_3_DF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256,      "__builtin_ia32_vfnmsubps256",   IX86_BUILTIN_VFNMSUBPS256,   UNKNOWN,      (int)MULTI_ARG_3_SF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256,      "__builtin_ia32_vfnmsubpd256",   IX86_BUILTIN_VFNMSUBPD256,   UNKNOWN,      (int)MULTI_ARG_3_DF2 },
-
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4,           "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4,           "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4,           "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
-  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4,           "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,     "__builtin_ia32_vfmaddss",    IX86_BUILTIN_VFMADDSS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,     "__builtin_ia32_vfmaddsd",    IX86_BUILTIN_VFMADDSD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
+
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav4sf4,               "__builtin_ia32_vfmaddps",    IX86_BUILTIN_VFMADDPS,    UNKNOWN,   (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav2df4,               "__builtin_ia32_vfmaddpd",    IX86_BUILTIN_VFMADDPD,    UNKNOWN,   (int)MULTI_ARG_3_DF },
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav8sf4,               "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN,   (int)MULTI_ARG_3_SF2 },
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav4df4,               "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN,   (int)MULTI_ARG_3_DF2 },
+
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v4sf,           "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,        UNKNOWN,  (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v2df,           "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,        UNKNOWN,  (int)MULTI_ARG_3_DF },
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v8sf,           "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,  UNKNOWN,  (int)MULTI_ARG_3_SF2 },
+  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v4df,           "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,  UNKNOWN,  (int)MULTI_ARG_3_DF2 },
 
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov",      IX86_BUILTIN_VPCMOV,     UNKNOWN,      (int)MULTI_ARG_3_DI },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN,      (int)MULTI_ARG_3_DI },
@@ -24043,8 +23996,8 @@ static const struct builtin_description bdesc_multi_arg[] =
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_2_DF },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         "__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         "__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
-  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
-  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256,         "__builtin_ia32_vfrczpd256",  IX86_BUILTIN_VFRCZPD256,  UNKNOWN,      (int)MULTI_ARG_1_DF2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
+  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2,         "__builtin_ia32_vfrczpd256",  IX86_BUILTIN_VFRCZPD256,  UNKNOWN,      (int)MULTI_ARG_1_DF2 },
 
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw,           "__builtin_ia32_vphaddbw",    IX86_BUILTIN_VPHADDBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd,           "__builtin_ia32_vphaddbd",    IX86_BUILTIN_VPHADDBD,    UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
index 0d3856e..ae52746 100644 (file)
   UNSPEC_PCMPISTR
 
   ;; For FMA4 support
-  UNSPEC_FMA4_INTRINSIC
-  UNSPEC_FMA4_FMADDSUB
-  UNSPEC_FMA4_FMSUBADD
+  UNSPEC_FMADDSUB
   UNSPEC_XOP_UNSIGNED_CMP
   UNSPEC_XOP_TRUEFALSE
   UNSPEC_XOP_PERMUTE
index 2be9903..d6e1f12 100644 (file)
@@ -55,7 +55,6 @@
 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
-(define_mode_iterator FMA4MODEF4 [V8SF V4DF])
 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
 
 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
@@ -70,6 +69,8 @@
 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
 
+(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
+
 ;; Int-float size matches
 (define_mode_iterator SSEMODE4S [V4SF V4SI])
 (define_mode_iterator SSEMODE2D [V2DF V2DI])
 ;;     (set (reg2) (mult (reg1) (mem (addr2))))
 ;;     (set (reg3) (plus (reg2) (mem (addr3))))
 
-(define_insn "fma4_fmadd<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
-       (plus:FMA4MODEF4
-        (mult:FMA4MODEF4
-         (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
-         (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
-        (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
+;; Intrinsic FMA operations.
+
+(define_expand "fma<mode>4"
+  [(set (match_operand:FMAMODE 0 "register_operand")
+       (fma:FMAMODE
+         (match_operand:FMAMODE 1 "nonimmediate_operand")
+         (match_operand:FMAMODE 2 "nonimmediate_operand")
+         (match_operand:FMAMODE 3 "nonimmediate_operand")))]
+  "TARGET_FMA4"
+  "")
+
+(define_insn "*fma4i_fmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+       (fma:FMAMODE
+         (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
+         (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
+         (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
+  "TARGET_FMA4"
   "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-;; Floating multiply and subtract.
-(define_insn "fma4_fmsub<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
-       (minus:FMA4MODEF4
-        (mult:FMA4MODEF4
-         (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
-         (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
-        (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
+(define_insn "*fma4i_fmsub_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+       (fma:FMAMODE
+         (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
+         (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
+         (neg:FMAMODE
+           (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
+  "TARGET_FMA4"
   "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-;; Floating point negative multiply and add.
-;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
-(define_insn "fma4_fnmadd<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
-       (minus:FMA4MODEF4
-        (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
-        (mult:FMA4MODEF4
-         (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
-         (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
+(define_insn "*fma4i_fnmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+       (fma:FMAMODE
+         (neg:FMAMODE
+           (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
+         (match_operand:FMAMODE   2 "nonimmediate_operand" " x,m")
+         (match_operand:FMAMODE   3 "nonimmediate_operand" "xm,x")))]
+  "TARGET_FMA4"
   "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-;; Floating point negative multiply and subtract.
-(define_insn "fma4_fnmsub<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
-       (minus:FMA4MODEF4
-        (mult:FMA4MODEF4
-         (neg:FMA4MODEF4
-          (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
-         (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
-        (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
+(define_insn "*fma4i_fnmsub_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+       (fma:FMAMODE
+         (neg:FMAMODE
+           (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
+         (match_operand:FMAMODE   2 "nonimmediate_operand" " x,m")
+         (neg:FMAMODE
+           (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
+  "TARGET_FMA4"
   "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fma4_fmadd<mode>4"
-  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
-       (plus:SSEMODEF4
-        (mult:SSEMODEF4
-         (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
-         (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
-        (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
+;; Scalar versions of the above.  Unlike ADDSS et al, these write the
+;; entire destination register, with the high-order elements zeroed.
+
+(define_expand "fma4i_vmfmadd_<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand")
+       (vec_merge:SSEMODEF2P
+         (fma:SSEMODEF2P
+           (match_operand:SSEMODEF2P 1 "nonimmediate_operand")
+           (match_operand:SSEMODEF2P 2 "nonimmediate_operand")
+           (match_operand:SSEMODEF2P 3 "nonimmediate_operand"))
+         (match_dup 4)
+         (const_int 1)))]
+  "TARGET_FMA4"
+{
+  operands[4] = CONST0_RTX (<MODE>mode);
+})
 
-;; For the scalar operations, use operand1 for the upper words that aren't
-;; modified, so restrict the forms that are generated.
-;; Scalar version of fmadd.
-(define_insn "fma4_vmfmadd<mode>4"
+(define_insn "*fma4i_vmfmadd_<mode>"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
        (vec_merge:SSEMODEF2P
-        (plus:SSEMODEF2P
-         (mult:SSEMODEF2P
-          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
-          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
-         (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
-        (match_dup 0)
-        (const_int 1)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
+         (fma:SSEMODEF2P
+           (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
+           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
+           (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+         (match_operand:SSEMODEF2P 4 "const0_operand" "")
+         (const_int 1)))]
+  "TARGET_FMA4"
   "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-;; Floating multiply and subtract.
-;; Allow two memory operands the same as fmadd.
-(define_insn "fma4_fmsub<mode>4"
-  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
-       (minus:SSEMODEF4
-        (mult:SSEMODEF4
-         (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
-         (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
-        (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-;; For the scalar operations, use operand1 for the upper words that aren't
-;; modified, so restrict the forms that are generated.
-;; Scalar version of fmsub.
-(define_insn "fma4_vmfmsub<mode>4"
+(define_insn "*fma4i_vmfmsub_<mode>"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
        (vec_merge:SSEMODEF2P
-        (minus:SSEMODEF2P
-         (mult:SSEMODEF2P
-          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
-          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
-         (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
-        (match_dup 0)
-        (const_int 1)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
+         (fma:SSEMODEF2P
+           (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
+           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m")
+           (neg:SSEMODEF2P
+             (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")))
+         (match_operand:SSEMODEF2P 4 "const0_operand" "")
+         (const_int 1)))]
+  "TARGET_FMA4"
   "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-;; Floating point negative multiply and add.
-;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
-(define_insn "fma4_fnmadd<mode>4"
-  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
-       (minus:SSEMODEF4
-        (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
-        (mult:SSEMODEF4
-         (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
-         (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-;; For the scalar operations, use operand1 for the upper words that aren't
-;; modified, so restrict the forms that are generated.
-;; Scalar version of fnmadd.
-(define_insn "fma4_vmfnmadd<mode>4"
+(define_insn "*fma4i_vmfnmadd_<mode>"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
        (vec_merge:SSEMODEF2P
-        (minus:SSEMODEF2P
-         (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
-         (mult:SSEMODEF2P
-          (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
-          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
-        (match_dup 0)
-        (const_int 1)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
+         (fma:SSEMODEF2P
+           (neg:SSEMODEF2P
+             (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
+           (match_operand:SSEMODEF2P   2 "nonimmediate_operand" " x,m")
+           (match_operand:SSEMODEF2P   3 "nonimmediate_operand" "xm,x"))
+         (match_operand:SSEMODEF2P 4 "const0_operand" "")
+         (const_int 1)))]
+  "TARGET_FMA4"
   "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-;; Floating point negative multiply and subtract.
-;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
-(define_insn "fma4_fnmsub<mode>4"
-  [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
-       (minus:SSEMODEF4
-        (mult:SSEMODEF4
-         (neg:SSEMODEF4
-          (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
-         (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
-        (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-;; For the scalar operations, use operand1 for the upper words that aren't
-;; modified, so restrict the forms that are generated.
-;; Scalar version of fnmsub.
-(define_insn "fma4_vmfnmsub<mode>4"
+(define_insn "*fma4i_vmfnmsub_<mode>"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
        (vec_merge:SSEMODEF2P
-        (minus:SSEMODEF2P
-         (mult:SSEMODEF2P
-          (neg:SSEMODEF2P
-           (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
-          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
-         (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
-        (match_dup 0)
-        (const_int 1)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "fma4i_fmadd<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
-       (unspec:FMA4MODEF4
-        [(plus:FMA4MODEF4
-          (mult:FMA4MODEF4
-           (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
-           (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
-          (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "fma4i_fmsub<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
-       (unspec:FMA4MODEF4
-        [(minus:FMA4MODEF4
-          (mult:FMA4MODEF4
-           (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
-           (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
-          (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "fma4i_fnmadd<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
-       (unspec:FMA4MODEF4
-        [(minus:FMA4MODEF4
-          (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
-          (mult:FMA4MODEF4
-           (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
-           (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
-        UNSPEC_FMA4_INTRINSIC))]
+         (fma:SSEMODEF2P
+           (neg:SSEMODEF2P
+             (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
+           (match_operand:SSEMODEF2P   2 "nonimmediate_operand" " x,m")
+           (neg:SSEMODEF2P
+             (match_operand:SSEMODEF2P   3 "nonimmediate_operand" "xm,x")))
+         (match_operand:SSEMODEF2P 4 "const0_operand" "")
+         (const_int 1)))]
   "TARGET_FMA4"
-  "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fma4i_fnmsub<mode>4256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
-       (unspec:FMA4MODEF4
-        [(minus:FMA4MODEF4
-          (mult:FMA4MODEF4
-           (neg:FMA4MODEF4
-            (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
-           (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
-          (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
+;; Non-intrinsic versions, matched when fused-multiply-add is allowed.
 
-(define_insn "fma4i_fmadd<mode>4"
-  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
-       (unspec:SSEMODEF2P
-        [(plus:SSEMODEF2P
-          (mult:SSEMODEF2P
-           (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
-           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
-          (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
+(define_insn "*fma4_fmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+       (plus:FMAMODE
+        (mult:FMAMODE
+         (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
+         (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m"))
+        (match_operand:FMAMODE 3 "nonimmediate_operand"  "xm,x")))]
+  "TARGET_FMA4 && TARGET_FUSED_MADD"
   "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fma4i_fmsub<mode>4"
-  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
-       (unspec:SSEMODEF2P
-        [(minus:SSEMODEF2P
-          (mult:SSEMODEF2P
-           (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
-           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
-          (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
+;; Floating multiply and subtract.
+(define_insn "*fma4_fmsub_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+       (minus:FMAMODE
+        (mult:FMAMODE
+         (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
+         (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m"))
+        (match_operand:FMAMODE 3 "nonimmediate_operand"  "xm,x")))]
+  "TARGET_FMA4 && TARGET_FUSED_MADD"
   "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fma4i_fnmadd<mode>4"
-  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
-       (unspec:SSEMODEF2P
-        [(minus:SSEMODEF2P
-          (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
-          (mult:SSEMODEF2P
-           (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
-           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
+;; Floating point negative multiply and add.
+;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
+(define_insn "*fma4_fnmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+       (minus:FMAMODE
+        (match_operand:FMAMODE 3 "nonimmediate_operand"  "xm,x")
+        (mult:FMAMODE
+         (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
+         (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m"))))]
+  "TARGET_FMA4 && TARGET_FUSED_MADD"
   "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fma4i_fnmsub<mode>4"
-  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
-       (unspec:SSEMODEF2P
-        [(minus:SSEMODEF2P
-          (mult:SSEMODEF2P
-           (neg:SSEMODEF2P
-            (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
-           (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
-          (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
+;; Floating point negative multiply and subtract.
+(define_insn "*fma4_fnmsub_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
+       (minus:FMAMODE
+        (mult:FMAMODE
+         (neg:FMAMODE
+          (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
+         (match_operand:FMAMODE 2 "nonimmediate_operand"  " x,m"))
+        (match_operand:FMAMODE 3 "nonimmediate_operand"   "xm,x")))]
+  "TARGET_FMA4 && TARGET_FUSED_MADD"
   "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-;; For the scalar operations, use operand1 for the upper words that aren't
-;; modified, so restrict the forms that are accepted.
-(define_insn "fma4i_vmfmadd<mode>4"
-  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
-       (unspec:SSEMODEF2P
-        [(vec_merge:SSEMODEF2P
-          (plus:SSEMODEF2P
-           (mult:SSEMODEF2P
-            (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
-            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
-           (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
-          (match_dup 0)
-          (const_int 1))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<ssescalarmode>")])
-
-(define_insn "fma4i_vmfmsub<mode>4"
-  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
-       (unspec:SSEMODEF2P
-        [(vec_merge:SSEMODEF2P
-          (minus:SSEMODEF2P
-           (mult:SSEMODEF2P
-            (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
-            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
-           (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
-          (match_dup 0)
-          (const_int 1))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<ssescalarmode>")])
-
-(define_insn "fma4i_vmfnmadd<mode>4"
-  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
-       (unspec:SSEMODEF2P
-        [(vec_merge:SSEMODEF2P
-          (minus:SSEMODEF2P
-           (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
-           (mult:SSEMODEF2P
-            (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
-            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
-          (match_dup 0)
-          (const_int 1))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<ssescalarmode>")])
-
-(define_insn "fma4i_vmfnmsub<mode>4"
-  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
-       (unspec:SSEMODEF2P
-        [(vec_merge:SSEMODEF2P
-          (minus:SSEMODEF2P
-           (mult:SSEMODEF2P
-            (neg:SSEMODEF2P
-             (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
-            (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
-           (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
-          (match_dup 0)
-          (const_int 1))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<ssescalarmode>")])
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "fma4_fmaddsubv8sf4"
-  [(set (match_operand:V8SF 0 "register_operand" "=x,x")
-       (vec_merge:V8SF
-         (plus:V8SF
-           (mult:V8SF
-             (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
-             (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
-           (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
-         (minus:V8SF
-           (mult:V8SF
-             (match_dup 1)
-             (match_dup 2))
-           (match_dup 3))
-         (const_int 170)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V8SF")])
-
-(define_insn "fma4_fmaddsubv4df4"
-  [(set (match_operand:V4DF 0 "register_operand" "=x,x")
-       (vec_merge:V4DF
-         (plus:V4DF
-           (mult:V4DF
-             (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
-             (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
-           (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
-         (minus:V4DF
-           (mult:V4DF
-             (match_dup 1)
-             (match_dup 2))
-           (match_dup 3))
-         (const_int 10)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V4DF")])
-
-(define_insn "fma4_fmaddsubv4sf4"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-       (vec_merge:V4SF
-         (plus:V4SF
-           (mult:V4SF
-             (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
-             (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
-           (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
-         (minus:V4SF
-           (mult:V4SF
-             (match_dup 1)
-             (match_dup 2))
-           (match_dup 3))
-         (const_int 10)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "fma4_fmaddsubv2df4"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-       (vec_merge:V2DF
-         (plus:V2DF
-           (mult:V2DF
-             (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
-             (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
-           (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
-         (minus:V2DF
-           (mult:V2DF
-             (match_dup 1)
-             (match_dup 2))
-           (match_dup 3))
-         (const_int 2)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "fma4_fmsubaddv8sf4"
-  [(set (match_operand:V8SF 0 "register_operand" "=x,x")
-       (vec_merge:V8SF
-         (plus:V8SF
-           (mult:V8SF
-             (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
-             (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
-           (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
-         (minus:V8SF
-           (mult:V8SF
-             (match_dup 1)
-             (match_dup 2))
-           (match_dup 3))
-         (const_int 85)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V8SF")])
-
-(define_insn "fma4_fmsubaddv4df4"
-  [(set (match_operand:V4DF 0 "register_operand" "=x,x")
-       (vec_merge:V4DF
-         (plus:V4DF
-           (mult:V4DF
-             (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
-             (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
-           (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
-         (minus:V4DF
-           (mult:V4DF
-             (match_dup 1)
-             (match_dup 2))
-           (match_dup 3))
-         (const_int 5)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V4DF")])
-
-(define_insn "fma4_fmsubaddv4sf4"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-       (vec_merge:V4SF
-         (plus:V4SF
-           (mult:V4SF
-             (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
-             (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
-           (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
-         (minus:V4SF
-           (mult:V4SF
-             (match_dup 1)
-             (match_dup 2))
-           (match_dup 3))
-         (const_int 5)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "fma4_fmsubaddv2df4"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-       (vec_merge:V2DF
-         (plus:V2DF
-           (mult:V2DF
-             (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
-             (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
-           (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
-         (minus:V2DF
-           (mult:V2DF
-             (match_dup 1)
-             (match_dup 2))
-           (match_dup 3))
-         (const_int 1)))]
-  "TARGET_FMA4 && TARGET_FUSED_MADD"
-  "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V2DF")])
+;; It would be possible to represent these without the UNSPEC as
+;;
+;; (vec_merge
+;;   (fma op1 op2 op3)
+;;   (fma op1 op2 (neg op3))
+;;   (merge-const))
+;;
+;; But this doesn't seem useful in practice.
 
-(define_insn "fma4i_fmaddsubv8sf4"
-  [(set (match_operand:V8SF 0 "register_operand" "=x,x")
-       (unspec:V8SF
-        [(vec_merge:V8SF
-          (plus:V8SF
-            (mult:V8SF
-              (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
-              (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
-            (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
-          (minus:V8SF
-            (mult:V8SF
-              (match_dup 1)
-              (match_dup 2))
-            (match_dup 3))
-          (const_int 170))]
-        UNSPEC_FMA4_INTRINSIC))]
+(define_insn "fma4i_fmaddsub_<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
+       (unspec:AVXMODEF2P
+         [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
+          (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
+          (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")]
+         UNSPEC_FMADDSUB))]
   "TARGET_FMA4"
   "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V8SF")])
 
-(define_insn "fma4i_fmaddsubv4df4"
-  [(set (match_operand:V4DF 0 "register_operand" "=x,x")
-       (unspec:V4DF
-        [(vec_merge:V4DF
-          (plus:V4DF
-            (mult:V4DF
-              (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
-              (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
-            (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
-          (minus:V4DF
-            (mult:V4DF
-              (match_dup 1)
-              (match_dup 2))
-            (match_dup 3))
-          (const_int 10))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V4DF")])
-
-(define_insn "fma4i_fmaddsubv4sf4"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-       (unspec:V4SF
-        [(vec_merge:V4SF
-          (plus:V4SF
-            (mult:V4SF
-              (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
-              (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
-            (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
-          (minus:V4SF
-            (mult:V4SF
-              (match_dup 1)
-              (match_dup 2))
-            (match_dup 3))
-          (const_int 10))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "fma4i_fmaddsubv2df4"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-       (unspec:V2DF
-        [(vec_merge:V2DF
-          (plus:V2DF
-            (mult:V2DF
-              (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
-              (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
-            (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
-          (minus:V2DF
-            (mult:V2DF
-              (match_dup 1)
-              (match_dup 2))
-            (match_dup 3))
-          (const_int 2))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "fma4i_fmsubaddv8sf4"
-  [(set (match_operand:V8SF 0 "register_operand" "=x,x")
-       (unspec:V8SF
-        [(vec_merge:V8SF
-          (plus:V8SF
-            (mult:V8SF
-              (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
-              (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
-            (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
-          (minus:V8SF
-            (mult:V8SF
-              (match_dup 1)
-              (match_dup 2))
-            (match_dup 3))
-          (const_int 85))]
-        UNSPEC_FMA4_INTRINSIC))]
+(define_insn "*fma4i_fmsubadd_<mode>"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
+       (unspec:AVXMODEF2P
+         [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x")
+          (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m")
+          (neg:AVXMODEF2P
+            (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))]
+         UNSPEC_FMADDSUB))]
   "TARGET_FMA4"
   "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "V8SF")])
 
-(define_insn "fma4i_fmsubaddv4df4"
-  [(set (match_operand:V4DF 0 "register_operand" "=x,x")
-       (unspec:V4DF
-        [(vec_merge:V4DF
-          (plus:V4DF
-            (mult:V4DF
-              (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
-              (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
-            (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
-          (minus:V4DF
-            (mult:V4DF
-              (match_dup 1)
-              (match_dup 2))
-            (match_dup 3))
-          (const_int 5))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V4DF")])
-
-(define_insn "fma4i_fmsubaddv4sf4"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-       (unspec:V4SF
-        [(vec_merge:V4SF
-          (plus:V4SF
-            (mult:V4SF
-              (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
-              (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
-            (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
-          (minus:V4SF
-            (mult:V4SF
-              (match_dup 1)
-              (match_dup 2))
-            (match_dup 3))
-          (const_int 5))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "fma4i_fmsubaddv2df4"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-       (unspec:V2DF
-        [(vec_merge:V2DF
-          (plus:V2DF
-            (mult:V2DF
-              (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
-              (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
-            (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
-          (minus:V2DF
-            (mult:V2DF
-              (match_dup 1)
-              (match_dup 2))
-            (match_dup 3))
-          (const_int 1))]
-        UNSPEC_FMA4_INTRINSIC))]
-  "TARGET_FMA4"
-  "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "V2DF")])
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point conversion operations
 })
 
 ;; XOP FRCZ support
-;; parallel insns
 (define_insn "xop_frcz<mode>2"
-  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
-       (unspec:SSEMODEF2P
-        [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
+       (unspec:FMAMODE
+        [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
         UNSPEC_FRCZ))]
   "TARGET_XOP"
   "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
    (set_attr "mode" "<MODE>")])
 
 ;; scalar insns
-(define_insn "xop_vmfrcz<mode>2"
-  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+(define_expand "xop_vmfrcz<mode>2"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand")
        (vec_merge:SSEMODEF2P
          (unspec:SSEMODEF2P
-          [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+          [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")]
           UNSPEC_FRCZ)
-         (match_operand:SSEMODEF2P 1 "register_operand" "0")
+         (match_dup 3)
          (const_int 1)))]
   "TARGET_XOP"
-  "vfrcz<ssescalarmodesuffix>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt1")
-   (set_attr "mode" "<MODE>")])
+{
+  operands[3] = CONST0_RTX (<MODE>mode);
+})
 
-(define_insn "xop_frcz<mode>2256"
-  [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
-       (unspec:FMA4MODEF4
-        [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
-        UNSPEC_FRCZ))]
+(define_insn "*xop_vmfrcz_<mode>"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+       (vec_merge:SSEMODEF2P
+         (unspec:SSEMODEF2P
+          [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
+          UNSPEC_FRCZ)
+         (match_operand:SSEMODEF2P 2 "const0_operand")
+         (const_int 1)))]
   "TARGET_XOP"
-  "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
+  "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt1")
    (set_attr "mode" "<MODE>")])