From a2f9d5b33291e9e61d0feaf98c8922d7d2ca87f1 Mon Sep 17 00:00:00 2001 From: rth Date: Tue, 19 Oct 2010 02:12:00 +0000 Subject: [PATCH] Simplify FMA4 patterns with FMA rtx code. Also fix incorrect rtl generation for scalar instructions. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@165676 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 43 +++ gcc/config/i386/fma4intrin.h | 44 +-- gcc/config/i386/i386.c | 79 +---- gcc/config/i386/i386.md | 4 +- gcc/config/i386/sse.md | 812 ++++++++++--------------------------------- 5 files changed, 259 insertions(+), 723 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f9eea06..8f1595d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,46 @@ +2010-10-18 Richard Henderson + + * config/i386/i386.c (IX86_BUILTIN_VFMSUBSS, IX86_BUILTIN_VFMSUBSD, + IX86_BUILTIN_VFMSUBPS, IX86_BUILTIN_VFMSUBPD, + IX86_BUILTIN_VFMSUBADDPS, IX86_BUILTIN_VFMSUBADDPD, + IX86_BUILTIN_VFNMADDSS, IX86_BUILTIN_VFNMADDSD, + IX86_BUILTIN_VFNMADDPS, IX86_BUILTIN_VFNMADDPD, + IX86_BUILTIN_VFNMSUBSS, IX86_BUILTIN_VFNMSUBSD, + IX86_BUILTIN_VFNMSUBPS, IX86_BUILTIN_VFNMSUBPD, + IX86_BUILTIN_VFMSUBADDPS256, IX86_BUILTIN_VFMSUBADDPD256, + IX86_BUILTIN_VFNMADDPS256, IX86_BUILTIN_VFNMADDPD256, + IX86_BUILTIN_VFNMSUBPS256, IX86_BUILTIN_VFNMSUBPD256): Remove. + (bdesc_multi_arg): Remove the corresponding builtins. + * config/i386/i386.md (UNSPEC_FMA4_INTRINSIC): Remove. + (UNSPEC_FMA4_FMSUBADD): Remove. + (UNSPEC_FMADDSUB): Rename from UNSPEC_FMA4_FMADDSUB. + * config/i386/sse.md (FMA4MODEF4): Remove. + (FMAMODE): Add. + (fma4): New expander. + (*fma4i_fmadd_): Macroize from fma4i_fmadd4 patterns, + and use FMA rtx code instead of UNSPEC_FMA4_INTRINSIC. + (*fma4i_fmsub_): Similarly. + (*fma4i_fnmadd_): Similarly. + (*fma4i_fnmsub_): Similarly. + (fma4i_vmfmadd_): Scalar patterns zero-extend, not merge + with the first operand. + (fma4i_fmaddsub_): Represent with UNSPEC_FMADDSUB instead + of explicit arithmetic. Macroize with AVXMODEF2P. + (*fma4i_fmsubadd_): Represent with UNSPEC_FMADDSUB + NEG. + (xop_frcz2): Macroize with FMAMODE. + (xop_vmfrcz2): Scalar patterns zero-extend, not merge with + the first operand. + * config/i386/fma4intrin.h (_mm_msub_ps): Use vfmadd intrinsic with + extra negations. + (_mm_msub_pd, _mm_msub_ss, _mm_msub_sd): Likewise. + (_mm_nmacc_ps, _mm_nmacc_pd, _mm_nmacc_ss, _mm_nmacc_sd): Likewise. + (_mm_nmsub_ps, _mm_nmsub_pd, _mm_nmsub_ss, _mm_nmsub_sd): Likewise. + (_mm256_msub_ps, _mm256_msub_pd): Likewise. + (_mm256_nmacc_ps, _mm256_nmacc_pd): Likewise. + (_mm256_nmsub_ps, _mm256_nmsub_pd): Likewise. + (_mm_msubadd_ps): Use vfmaddsub intrinsic with extra negation. + (_mm_msubadd_pd, _mm256_msubadd_ps, _mm256_msubadd_pd): Likewise. + 2010-10-18 Bernd Schmidt PR rtl-optimization/45966 diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h index 2bd411a..b910cd1 100644 --- a/gcc/config/i386/fma4intrin.h +++ b/gcc/config/i386/fma4intrin.h @@ -64,73 +64,73 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif _mm_msub_ps (__m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msub_pd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msub_ss (__m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msub_sd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfnmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfnmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfnmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfnmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -148,13 +148,13 @@ _mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmsubaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmsubaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C); } /* 256b Floating point multiply/add type instructions. */ @@ -174,37 +174,37 @@ extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artif _mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -222,13 +222,13 @@ _mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C) extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmsubaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); + return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmsubaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C); + return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C); } #endif diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index eada313..7da2cfb 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -22769,34 +22769,12 @@ enum ix86_builtins IX86_BUILTIN_VFMADDSD, IX86_BUILTIN_VFMADDPS, IX86_BUILTIN_VFMADDPD, - IX86_BUILTIN_VFMSUBSS, - IX86_BUILTIN_VFMSUBSD, - IX86_BUILTIN_VFMSUBPS, - IX86_BUILTIN_VFMSUBPD, - IX86_BUILTIN_VFMADDSUBPS, - IX86_BUILTIN_VFMADDSUBPD, - IX86_BUILTIN_VFMSUBADDPS, - IX86_BUILTIN_VFMSUBADDPD, - IX86_BUILTIN_VFNMADDSS, - IX86_BUILTIN_VFNMADDSD, - IX86_BUILTIN_VFNMADDPS, - IX86_BUILTIN_VFNMADDPD, - IX86_BUILTIN_VFNMSUBSS, - IX86_BUILTIN_VFNMSUBSD, - IX86_BUILTIN_VFNMSUBPS, - IX86_BUILTIN_VFNMSUBPD, IX86_BUILTIN_VFMADDPS256, IX86_BUILTIN_VFMADDPD256, - IX86_BUILTIN_VFMSUBPS256, - IX86_BUILTIN_VFMSUBPD256, + IX86_BUILTIN_VFMADDSUBPS, + IX86_BUILTIN_VFMADDSUBPD, IX86_BUILTIN_VFMADDSUBPS256, IX86_BUILTIN_VFMADDSUBPD256, - IX86_BUILTIN_VFMSUBADDPS256, - IX86_BUILTIN_VFMSUBADDPD256, - IX86_BUILTIN_VFNMADDPS256, - IX86_BUILTIN_VFNMADDPD256, - IX86_BUILTIN_VFNMSUBPS256, - IX86_BUILTIN_VFNMSUBPD256, IX86_BUILTIN_VPCMOV, IX86_BUILTIN_VPCMOV_V2DI, @@ -23953,43 +23931,18 @@ static const struct builtin_description bdesc_args[] = static const struct builtin_description bdesc_multi_arg[] = { - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF }, - - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF }, - - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF }, - - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, - - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, - - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, - { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF }, + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF }, + + { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF }, + { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF }, + { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav8sf4, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, + { OPTION_MASK_ISA_FMA4, CODE_FOR_fmav4df4, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, + + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v4sf, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF }, + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v2df, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF }, + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v8sf, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsub_v4df, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI }, { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI }, @@ -24043,8 +23996,8 @@ static const struct builtin_description bdesc_multi_arg[] = { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF }, { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF }, { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF }, - { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 }, - { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 }, + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 }, { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI }, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0d3856e..ae52746 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -199,9 +199,7 @@ UNSPEC_PCMPISTR ;; For FMA4 support - UNSPEC_FMA4_INTRINSIC - UNSPEC_FMA4_FMADDSUB - UNSPEC_FMA4_FMSUBADD + UNSPEC_FMADDSUB UNSPEC_XOP_UNSIGNED_CMP UNSPEC_XOP_TRUEFALSE UNSPEC_XOP_PERMUTE diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 2be9903b..d6e1f12 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -55,7 +55,6 @@ (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI]) (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI]) (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF]) -(define_mode_iterator FMA4MODEF4 [V8SF V4DF]) (define_mode_iterator SSEMODEF2P [V4SF V2DF]) (define_mode_iterator AVX256MODEF2P [V8SF V4DF]) @@ -70,6 +69,8 @@ (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF]) (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI]) +(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) + ;; Int-float size matches (define_mode_iterator SSEMODE4S [V4SF V4SI]) (define_mode_iterator SSEMODE2D [V2DF V2DI]) @@ -1767,698 +1768,237 @@ ;; (set (reg2) (mult (reg1) (mem (addr2)))) ;; (set (reg3) (plus (reg2) (mem (addr3)))) -(define_insn "fma4_fmadd4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") - (plus:FMA4MODEF4 - (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4 && TARGET_FUSED_MADD" +;; Intrinsic FMA operations. + +(define_expand "fma4" + [(set (match_operand:FMAMODE 0 "register_operand") + (fma:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand") + (match_operand:FMAMODE 2 "nonimmediate_operand") + (match_operand:FMAMODE 3 "nonimmediate_operand")))] + "TARGET_FMA4" + "") + +(define_insn "*fma4i_fmadd_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") + (fma:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") + (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4" "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -;; Floating multiply and subtract. -(define_insn "fma4_fmsub4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") - (minus:FMA4MODEF4 - (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4 && TARGET_FUSED_MADD" +(define_insn "*fma4i_fmsub_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") + (fma:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") + (neg:FMAMODE + (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))] + "TARGET_FMA4" "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -;; Floating point negative multiply and add. -;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b). -(define_insn "fma4_fnmadd4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") - (minus:FMA4MODEF4 - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x") - (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))] - "TARGET_FMA4 && TARGET_FUSED_MADD" +(define_insn "*fma4i_fnmadd_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") + (fma:FMAMODE + (neg:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") + (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4" "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -;; Floating point negative multiply and subtract. -(define_insn "fma4_fnmsub4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") - (minus:FMA4MODEF4 - (mult:FMA4MODEF4 - (neg:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")) - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4 && TARGET_FUSED_MADD" +(define_insn "*fma4i_fnmsub_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") + (fma:FMAMODE + (neg:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") + (neg:FMAMODE + (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))] + "TARGET_FMA4" "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "fma4_fmadd4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") - (plus:SSEMODEF4 - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) +;; Scalar versions of the above. Unlike ADDSS et al, these write the +;; entire destination register, with the high-order elements zeroed. + +(define_expand "fma4i_vmfmadd_" + [(set (match_operand:SSEMODEF2P 0 "register_operand") + (vec_merge:SSEMODEF2P + (fma:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand") + (match_operand:SSEMODEF2P 3 "nonimmediate_operand")) + (match_dup 4) + (const_int 1)))] + "TARGET_FMA4" +{ + operands[4] = CONST0_RTX (mode); +}) -;; For the scalar operations, use operand1 for the upper words that aren't -;; modified, so restrict the forms that are generated. -;; Scalar version of fmadd. -(define_insn "fma4_vmfmadd4" +(define_insn "*fma4i_vmfmadd_" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (vec_merge:SSEMODEF2P - (plus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 0) - (const_int 1)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" + (fma:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m") + (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) + (match_operand:SSEMODEF2P 4 "const0_operand" "") + (const_int 1)))] + "TARGET_FMA4" "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -;; Floating multiply and subtract. -;; Allow two memory operands the same as fmadd. -(define_insn "fma4_fmsub4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") - (minus:SSEMODEF4 - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -;; For the scalar operations, use operand1 for the upper words that aren't -;; modified, so restrict the forms that are generated. -;; Scalar version of fmsub. -(define_insn "fma4_vmfmsub4" +(define_insn "*fma4i_vmfmsub_" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 0) - (const_int 1)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" + (fma:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m") + (neg:SSEMODEF2P + (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))) + (match_operand:SSEMODEF2P 4 "const0_operand" "") + (const_int 1)))] + "TARGET_FMA4" "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -;; Floating point negative multiply and add. -;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b). -(define_insn "fma4_fnmadd4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") - (minus:SSEMODEF4 - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x") - (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -;; For the scalar operations, use operand1 for the upper words that aren't -;; modified, so restrict the forms that are generated. -;; Scalar version of fnmadd. -(define_insn "fma4_vmfnmadd4" +(define_insn "*fma4i_vmfnmadd_" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))) - (match_dup 0) - (const_int 1)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" + (fma:SSEMODEF2P + (neg:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m") + (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) + (match_operand:SSEMODEF2P 4 "const0_operand" "") + (const_int 1)))] + "TARGET_FMA4" "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -;; Floating point negative multiply and subtract. -;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c. -(define_insn "fma4_fnmsub4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") - (minus:SSEMODEF4 - (mult:SSEMODEF4 - (neg:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")) - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -;; For the scalar operations, use operand1 for the upper words that aren't -;; modified, so restrict the forms that are generated. -;; Scalar version of fnmsub. -(define_insn "fma4_vmfnmsub4" +(define_insn "*fma4i_vmfnmsub_" [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") (vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 0) - (const_int 1)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "fma4i_fmadd4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") - (unspec:FMA4MODEF4 - [(plus:FMA4MODEF4 - (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "fma4i_fmsub4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") - (unspec:FMA4MODEF4 - [(minus:FMA4MODEF4 - (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "fma4i_fnmadd4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") - (unspec:FMA4MODEF4 - [(minus:FMA4MODEF4 - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x") - (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))] - UNSPEC_FMA4_INTRINSIC))] + (fma:SSEMODEF2P + (neg:SSEMODEF2P + (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" " x,m") + (neg:SSEMODEF2P + (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))) + (match_operand:SSEMODEF2P 4 "const0_operand" "") + (const_int 1)))] "TARGET_FMA4" - "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "fma4i_fnmsub4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") - (unspec:FMA4MODEF4 - [(minus:FMA4MODEF4 - (mult:FMA4MODEF4 - (neg:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")) - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) +;; Non-intrinsic versions, matched when fused-multiply-add is allowed. -(define_insn "fma4i_fmadd4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(plus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" +(define_insn "*fma4_fmadd_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") + (plus:FMAMODE + (mult:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")) + (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && TARGET_FUSED_MADD" "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "fma4i_fmsub4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" +;; Floating multiply and subtract. +(define_insn "*fma4_fmsub_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") + (minus:FMAMODE + (mult:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")) + (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && TARGET_FUSED_MADD" "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "fma4i_fnmadd4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" +;; Floating point negative multiply and add. +;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b). +(define_insn "*fma4_fnmadd_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") + (minus:FMAMODE + (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x") + (mult:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m"))))] + "TARGET_FMA4 && TARGET_FUSED_MADD" "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "fma4i_fnmsub4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(minus:SSEMODEF2P - (mult:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" +;; Floating point negative multiply and subtract. +(define_insn "*fma4_fnmsub_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") + (minus:FMAMODE + (mult:FMAMODE + (neg:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")) + (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && TARGET_FUSED_MADD" "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -;; For the scalar operations, use operand1 for the upper words that aren't -;; modified, so restrict the forms that are accepted. -(define_insn "fma4i_vmfmadd4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (plus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 0) - (const_int 1))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "fma4i_vmfmsub4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 0) - (const_int 1))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "fma4i_vmfnmadd4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") - (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))) - (match_dup 0) - (const_int 1))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "fma4i_vmfnmsub4" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") - (unspec:SSEMODEF2P - [(vec_merge:SSEMODEF2P - (minus:SSEMODEF2P - (mult:SSEMODEF2P - (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) - (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) - (match_dup 0) - (const_int 1))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; FMA4 Parallel floating point multiply addsub and subadd operations. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "fma4_fmaddsubv8sf4" - [(set (match_operand:V8SF 0 "register_operand" "=x,x") - (vec_merge:V8SF - (plus:V8SF - (mult:V8SF - (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") - (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) - (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) - (minus:V8SF - (mult:V8SF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 170)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V8SF")]) - -(define_insn "fma4_fmaddsubv4df4" - [(set (match_operand:V4DF 0 "register_operand" "=x,x") - (vec_merge:V4DF - (plus:V4DF - (mult:V4DF - (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") - (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) - (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) - (minus:V4DF - (mult:V4DF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 10)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V4DF")]) - -(define_insn "fma4_fmaddsubv4sf4" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_merge:V4SF - (plus:V4SF - (mult:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") - (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) - (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) - (minus:V4SF - (mult:V4SF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 10)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V4SF")]) - -(define_insn "fma4_fmaddsubv2df4" - [(set (match_operand:V2DF 0 "register_operand" "=x,x") - (vec_merge:V2DF - (plus:V2DF - (mult:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") - (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) - (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) - (minus:V2DF - (mult:V2DF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 2)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V2DF")]) - -(define_insn "fma4_fmsubaddv8sf4" - [(set (match_operand:V8SF 0 "register_operand" "=x,x") - (vec_merge:V8SF - (plus:V8SF - (mult:V8SF - (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") - (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) - (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) - (minus:V8SF - (mult:V8SF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 85)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V8SF")]) - -(define_insn "fma4_fmsubaddv4df4" - [(set (match_operand:V4DF 0 "register_operand" "=x,x") - (vec_merge:V4DF - (plus:V4DF - (mult:V4DF - (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") - (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) - (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) - (minus:V4DF - (mult:V4DF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 5)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V4DF")]) - -(define_insn "fma4_fmsubaddv4sf4" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (vec_merge:V4SF - (plus:V4SF - (mult:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") - (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) - (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) - (minus:V4SF - (mult:V4SF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 5)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V4SF")]) - -(define_insn "fma4_fmsubaddv2df4" - [(set (match_operand:V2DF 0 "register_operand" "=x,x") - (vec_merge:V2DF - (plus:V2DF - (mult:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") - (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) - (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) - (minus:V2DF - (mult:V2DF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 1)))] - "TARGET_FMA4 && TARGET_FUSED_MADD" - "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V2DF")]) +;; It would be possible to represent these without the UNSPEC as +;; +;; (vec_merge +;; (fma op1 op2 op3) +;; (fma op1 op2 (neg op3)) +;; (merge-const)) +;; +;; But this doesn't seem useful in practice. -(define_insn "fma4i_fmaddsubv8sf4" - [(set (match_operand:V8SF 0 "register_operand" "=x,x") - (unspec:V8SF - [(vec_merge:V8SF - (plus:V8SF - (mult:V8SF - (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") - (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) - (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) - (minus:V8SF - (mult:V8SF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 170))] - UNSPEC_FMA4_INTRINSIC))] +(define_insn "fma4i_fmaddsub_" + [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x") + (unspec:AVXMODEF2P + [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m") + (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")] + UNSPEC_FMADDSUB))] "TARGET_FMA4" "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V8SF")]) -(define_insn "fma4i_fmaddsubv4df4" - [(set (match_operand:V4DF 0 "register_operand" "=x,x") - (unspec:V4DF - [(vec_merge:V4DF - (plus:V4DF - (mult:V4DF - (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") - (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) - (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) - (minus:V4DF - (mult:V4DF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 10))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V4DF")]) - -(define_insn "fma4i_fmaddsubv4sf4" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (unspec:V4SF - [(vec_merge:V4SF - (plus:V4SF - (mult:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") - (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) - (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) - (minus:V4SF - (mult:V4SF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 10))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V4SF")]) - -(define_insn "fma4i_fmaddsubv2df4" - [(set (match_operand:V2DF 0 "register_operand" "=x,x") - (unspec:V2DF - [(vec_merge:V2DF - (plus:V2DF - (mult:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") - (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) - (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) - (minus:V2DF - (mult:V2DF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 2))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V2DF")]) - -(define_insn "fma4i_fmsubaddv8sf4" - [(set (match_operand:V8SF 0 "register_operand" "=x,x") - (unspec:V8SF - [(vec_merge:V8SF - (plus:V8SF - (mult:V8SF - (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") - (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) - (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) - (minus:V8SF - (mult:V8SF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 85))] - UNSPEC_FMA4_INTRINSIC))] +(define_insn "*fma4i_fmsubadd_" + [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x") + (unspec:AVXMODEF2P + [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x,x") + (match_operand:AVXMODEF2P 2 "nonimmediate_operand" " x,m") + (neg:AVXMODEF2P + (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x"))] + UNSPEC_FMADDSUB))] "TARGET_FMA4" "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V8SF")]) -(define_insn "fma4i_fmsubaddv4df4" - [(set (match_operand:V4DF 0 "register_operand" "=x,x") - (unspec:V4DF - [(vec_merge:V4DF - (plus:V4DF - (mult:V4DF - (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") - (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) - (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) - (minus:V4DF - (mult:V4DF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 5))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V4DF")]) - -(define_insn "fma4i_fmsubaddv4sf4" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") - (unspec:V4SF - [(vec_merge:V4SF - (plus:V4SF - (mult:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") - (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) - (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) - (minus:V4SF - (mult:V4SF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 5))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V4SF")]) - -(define_insn "fma4i_fmsubaddv2df4" - [(set (match_operand:V2DF 0 "register_operand" "=x,x") - (unspec:V2DF - [(vec_merge:V2DF - (plus:V2DF - (mult:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") - (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) - (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) - (minus:V2DF - (mult:V2DF - (match_dup 1) - (match_dup 2)) - (match_dup 3)) - (const_int 1))] - UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4" - "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "V2DF")]) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel single-precision floating point conversion operations @@ -11405,11 +10945,10 @@ }) ;; XOP FRCZ support -;; parallel insns (define_insn "xop_frcz2" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") - (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] + [(set (match_operand:FMAMODE 0 "register_operand" "=x") + (unspec:FMAMODE + [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")] UNSPEC_FRCZ))] "TARGET_XOP" "vfrcz\t{%1, %0|%0, %1}" @@ -11417,26 +10956,29 @@ (set_attr "mode" "")]) ;; scalar insns -(define_insn "xop_vmfrcz2" - [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") +(define_expand "xop_vmfrcz2" + [(set (match_operand:SSEMODEF2P 0 "register_operand") (vec_merge:SSEMODEF2P (unspec:SSEMODEF2P - [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] + [(match_operand:SSEMODEF2P 1 "nonimmediate_operand")] UNSPEC_FRCZ) - (match_operand:SSEMODEF2P 1 "register_operand" "0") + (match_dup 3) (const_int 1)))] "TARGET_XOP" - "vfrcz\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt1") - (set_attr "mode" "")]) +{ + operands[3] = CONST0_RTX (mode); +}) -(define_insn "xop_frcz2256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x") - (unspec:FMA4MODEF4 - [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")] - UNSPEC_FRCZ))] +(define_insn "*xop_vmfrcz_" + [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") + (vec_merge:SSEMODEF2P + (unspec:SSEMODEF2P + [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] + UNSPEC_FRCZ) + (match_operand:SSEMODEF2P 2 "const0_operand") + (const_int 1)))] "TARGET_XOP" - "vfrcz\t{%1, %0|%0, %1}" + "vfrcz\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt1") (set_attr "mode" "")]) -- 2.7.4