+2008-08-28 H.J. Lu <hongjiu.lu@intel.com>
+ Joey Ye <joey.ye@intel.com>
+ Xuepeng Guo <xuepeng.guo@intel.com>
+
+ * config.gcc (extra_headers): Add gmmintrin.h for x86 and x86-64.
+
+ * config/i386/cpuid.h (bit_FMA): New.
+ (bit_XSAVE): Likewise.
+ (bit_OSXSAVE): Likewise.
+ (bit_AVX): Likewise.
+
+ * config/i386/gas.h (ASM_OUTPUT_OPCODE): Undefine before
+ define. Use ASM_OUTPUT_AVX_PREFIX.
+
+ * config/i386/gmmintrin.h: New.
+
+ * config/i386/i386.c (x86_64_reg_class): Add X86_64_AVX_CLASS.
+ (OPTION_MASK_ISA_AVX_SET): New.
+ (OPTION_MASK_ISA_FMA_SET): Likewise.
+ (OPTION_MASK_ISA_AVX_UNSET): Likewise.
+ (OPTION_MASK_ISA_FMA_SET): Likewise.
+ (OPTION_MASK_ISA_SSE4_2_UNSET): Updated.
+ (ix86_handle_option): Handle OPT_mavx and OPT_mfma.
+ (pta_flags): Add PTA_AVX and PTA_FMA.
+ (override_options): Handle PTA_AVX and PTA_FMA.
+ (init_cumulative_args): Handle warn_avx.
+ (classify_argument): Return 0 for COImode and OImode. Return
+ 1 and X86_64_AVX_CLASS for 256bit vector types.
+ (examine_argument): Handle X86_64_AVX_CLASS.
+ (construct_container): Likewise.
+ (function_arg_advance_32): Pass OImode and 256bit vector types
+ in AVX register.
+ (function_arg_advance_64): Take a new argument to indicate if a
+ parameter is named. Handle 256bit vector types. Return
+ immediately for unnamed 256bit vector mode parameters.
+ (function_arg_advance): Updated.
+ (function_arg_32): Add comments for TImode. Handle OImode
+ and 256bit vector types.
+ (function_arg_64): Take a new argument to indicate if a
+ parameter is named. Handle 256bit vector types. Return NULL
+ for unnamed 256bit vector mode parameters.
+ (function_arg): Updated.
+ (setup_incoming_varargs_64): Support
+ AVX encoding for *sse_prologue_save_insn.
+ (ix86_gimplify_va_arg): Handle 256bit vector mode parameters.
+ (standard_sse_constant_p): Return -2 for all 1s if SSE2 isn't
+ enabled. For all 1s in 256bit vector modes, return 3 if AVX is
+ enabled, otherwise return -3.
+ (standard_sse_constant_opcode): Handle AVX and 256bit vector
+ modes.
+ (print_reg): Support AVX registers. Handle 'x' and 't'.
+ Handle 'd' to duplicate the operand.
+ (print_operand): Likewise. Also support AVX vector compare
+ instructions.
+ (output_387_binary_op): Support AVX.
+ (output_fp_compare): Likewise.
+ (ix86_expand_vector_move_misalign): Likewise.
+ (ix86_attr_length_vex_default): New.
+ (ix86_builtins): Add IX86_BUILTIN_ADDPD256,
+ IX86_BUILTIN_ADDPS256, IX86_BUILTIN_ADDSUBPD256,
+ IX86_BUILTIN_ADDSUBPS256, IX86_BUILTIN_ANDPD256,
+ IX86_BUILTIN_ANDPS256, IX86_BUILTIN_ANDNPD256,
+ IX86_BUILTIN_ANDNPS256, IX86_BUILTIN_BLENDPD256,
+ IX86_BUILTIN_BLENDPS256, IX86_BUILTIN_BLENDVPD256,
+ IX86_BUILTIN_BLENDVPS256, IX86_BUILTIN_DIVPD256,
+ IX86_BUILTIN_DIVPS256, IX86_BUILTIN_DPPS256,
+ IX86_BUILTIN_HADDPD256, IX86_BUILTIN_HADDPS256,
+ IX86_BUILTIN_HSUBPD256, IX86_BUILTIN_HSUBPS256,
+ IX86_BUILTIN_MAXPD256, IX86_BUILTIN_MAXPS256,
+ IX86_BUILTIN_MINPD256, IX86_BUILTIN_MINPS256,
+ IX86_BUILTIN_MULPD256, IX86_BUILTIN_MULPS256,
+ IX86_BUILTIN_ORPD256, IX86_BUILTIN_ORPS256,
+ IX86_BUILTIN_SHUFPD256, IX86_BUILTIN_SHUFPS256,
+ IX86_BUILTIN_SUBPD256, IX86_BUILTIN_SUBPS256,
+ IX86_BUILTIN_XORPD256, IX86_BUILTIN_XORPS256,
+ IX86_BUILTIN_CMPSD, IX86_BUILTIN_CMPSS, IX86_BUILTIN_CMPPD,
+ IX86_BUILTIN_CMPPS, IX86_BUILTIN_CMPPD256,
+ IX86_BUILTIN_CMPPS256, IX86_BUILTIN_CVTDQ2PD256,
+ IX86_BUILTIN_CVTDQ2PS256, IX86_BUILTIN_CVTPD2PS256,
+ IX86_BUILTIN_CVTPS2DQ256, IX86_BUILTIN_CVTPS2PD256,
+ IX86_BUILTIN_CVTTPD2DQ256, IX86_BUILTIN_CVTPD2DQ256,
+ IX86_BUILTIN_CVTTPS2DQ256, IX86_BUILTIN_EXTRACTF128PD256,
+ IX86_BUILTIN_EXTRACTF128PS256, IX86_BUILTIN_EXTRACTF128SI256,
+ IX86_BUILTIN_VZEROALL, IX86_BUILTIN_VZEROUPPER,
+ IX86_BUILTIN_VZEROUPPER_REX64, IX86_BUILTIN_VPERMILVARPD,
+ IX86_BUILTIN_VPERMILVARPS, IX86_BUILTIN_VPERMILVARPD256,
+ IX86_BUILTIN_VPERMILVARPS256, IX86_BUILTIN_VPERMILPD,
+ IX86_BUILTIN_VPERMILPS, IX86_BUILTIN_VPERMILPD256,
+ IX86_BUILTIN_VPERMILPS256, IX86_BUILTIN_VPERMIL2PD,
+ IX86_BUILTIN_VPERMIL2PS, IX86_BUILTIN_VPERMIL2PD256,
+ IX86_BUILTIN_VPERMIL2PS256, IX86_BUILTIN_VPERM2F128PD256,
+ IX86_BUILTIN_VPERM2F128PS256, IX86_BUILTIN_VPERM2F128SI256,
+ IX86_BUILTIN_VBROADCASTSS, IX86_BUILTIN_VBROADCASTSD256,
+ IX86_BUILTIN_VBROADCASTSS256, IX86_BUILTIN_VBROADCASTPD256,
+ IX86_BUILTIN_VBROADCASTPS256, IX86_BUILTIN_VINSERTF128PD256,
+ IX86_BUILTIN_VINSERTF128PS256, IX86_BUILTIN_VINSERTF128SI256,
+ IX86_BUILTIN_LOADUPD256, IX86_BUILTIN_LOADUPS256,
+ IX86_BUILTIN_STOREUPD256, IX86_BUILTIN_STOREUPS256,
+ IX86_BUILTIN_LDDQU256, IX86_BUILTIN_LOADDQU256,
+ IX86_BUILTIN_STOREDQU256, IX86_BUILTIN_MASKLOADPD,
+ IX86_BUILTIN_MASKLOADPS, IX86_BUILTIN_MASKSTOREPD,
+ IX86_BUILTIN_MASKSTOREPS, IX86_BUILTIN_MASKLOADPD256,
+ IX86_BUILTIN_MASKLOADPS256, IX86_BUILTIN_MASKSTOREPD256,
+ IX86_BUILTIN_MASKSTOREPS256, IX86_BUILTIN_MOVSHDUP256,
+ IX86_BUILTIN_MOVSLDUP256, IX86_BUILTIN_MOVDDUP256,
+ IX86_BUILTIN_SQRTPD256, IX86_BUILTIN_SQRTPS256,
+ IX86_BUILTIN_SQRTPS_NR256, IX86_BUILTIN_RSQRTPS256,
+ IX86_BUILTIN_RSQRTPS_NR256, IX86_BUILTIN_RCPPS256,
+ IX86_BUILTIN_ROUNDPD256, IX86_BUILTIN_ROUNDPS256,
+ IX86_BUILTIN_UNPCKHPD256, IX86_BUILTIN_UNPCKLPD256,
+ IX86_BUILTIN_UNPCKHPS256, IX86_BUILTIN_UNPCKLPS256,
+ IX86_BUILTIN_SI256_SI, IX86_BUILTIN_PS256_PS,
+ IX86_BUILTIN_PD256_PD, IX86_BUILTIN_SI_SI256,
+ IX86_BUILTIN_PS_PS256, IX86_BUILTIN_PD_PD256,
+ IX86_BUILTIN_VTESTZPD, IX86_BUILTIN_VTESTCPD,
+ IX86_BUILTIN_VTESTNZCPD, IX86_BUILTIN_VTESTZPS,
+ IX86_BUILTIN_VTESTCPS, IX86_BUILTIN_VTESTNZCPS,
+ IX86_BUILTIN_VTESTZPD256, IX86_BUILTIN_VTESTCPD256,
+ IX86_BUILTIN_VTESTNZCPD256, IX86_BUILTIN_VTESTZPS256,
+ IX86_BUILTIN_VTESTCPS256, IX86_BUILTIN_VTESTNZCPS256,
+ IX86_BUILTIN_PTESTZ256, IX86_BUILTIN_PTESTC256,
+ IX86_BUILTIN_PTESTNZC256, IX86_BUILTIN_MOVMSKPD256
+ and IX86_BUILTIN_MOVMSKPS256,
+ (ix86_special_builtin_type): Add V32QI_FTYPE_PCCHAR,
+ V8SF_FTYPE_PCV4SF, V8SF_FTYPE_PCFLOAT, V4DF_FTYPE_PCV2DF,
+ V4DF_FTYPE_PCDOUBLE, V8SF_FTYPE_PCV8SF_V8SF,
+ V4DF_FTYPE_PCV4DF_V4DF, V4SF_FTYPE_PCV4SF_V4SF,
+ V2DF_FTYPE_PCV2DF_V2DF, VOID_FTYPE_PCHAR_V32QI,
+ VOID_FTYPE_PFLOAT_V8SF, VOID_FTYPE_PDOUBLE_V4DF,
+ VOID_FTYPE_PV8SF_V8SF_V8SF, VOID_FTYPE_PV4DF_V4DF_V4DF,
+ VOID_FTYPE_PV4SF_V4SF_V4SF and VOID_FTYPE_PV2DF_V2DF_V2DF,
+ (ix86_builtin_type): Add INT_FTYPE_V8SF_V8SF_PTEST,
+ INT_FTYPE_V4DI_V4DI_PTEST, INT_FTYPE_V4DF_V4DF_PTEST,
+ INT_FTYPE_V4SF_V4SF_PTEST, INT_FTYPE_V2DF_V2DF_PTEST,
+ INT_FTYPE_V8SF, INT_FTYPE_V4DF, V8SI_FTYPE_V8SF, V8SI_FTYPE_V4SI,
+ V8SF_FTYPE_V8SF, V8SF_FTYPE_V8SI, V8SF_FTYPE_V4SF,
+ V4SI_FTYPE_V8SI, V4SI_FTYPE_V4DF, V4DF_FTYPE_V4DF,
+ V4DF_FTYPE_V4SI, V4DF_FTYPE_V4SF, V4DF_FTYPE_V2DF,
+ V4SF_FTYPE_V4DF, V4SF_FTYPE_V8SF, V2DF_FTYPE_V4DF,
+ V8SF_FTYPE_V8SF_V8SF, V8SF_FTYPE_V8SF_V8SI,
+ V4DF_FTYPE_V4DF_V4DF, V4DF_FTYPE_V4DF_V4DI,
+ V4SF_FTYPE_V4SF_V4SI, V2DF_FTYPE_V2DF_V2DI,
+ V8SF_FTYPE_V8SF_INT, V4SI_FTYPE_V8SI_INT, V4SF_FTYPE_V8SF_INT,
+ V2DF_FTYPE_V4DF_INT, V4DF_FTYPE_V4DF_INT,
+ V8SF_FTYPE_V8SF_V8SF_V8SF, V4DF_FTYPE_V4DF_V4DF_V4DF,
+ V8SI_FTYPE_V8SI_V8SI_INT, V8SF_FTYPE_V8SF_V8SF_INT,
+ V4DF_FTYPE_V4DF_V4DF_INT, V4DF_FTYPE_V4DF_V2DF_INT,
+ V8SF_FTYPE_V8SF_V8SF_V8SI_INT, V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
+ V4SF_FTYPE_V4SF_V4SF_V4SI_INT and V2DF_FTYPE_V2DF_V2DF_V2DI_INT.
+ (bdesc_special_args): Add IX86_BUILTIN_VZEROALL,
+ IX86_BUILTIN_VZEROUPPER. IX86_BUILTIN_VZEROUPPER_REX64,
+ IX86_BUILTIN_VBROADCASTSS, IX86_BUILTIN_VBROADCASTSD256,
+ IX86_BUILTIN_VBROADCASTSS256, IX86_BUILTIN_VBROADCASTPD256,
+ IX86_BUILTIN_VBROADCASTPS256, IX86_BUILTIN_LOADUPD256,
+ IX86_BUILTIN_LOADUPS256, IX86_BUILTIN_STOREUPD256,
+ IX86_BUILTIN_STOREUPS256, IX86_BUILTIN_LOADDQU256,
+ IX86_BUILTIN_STOREDQU256, IX86_BUILTIN_LDDQU256,
+ IX86_BUILTIN_MASKLOADPD, IX86_BUILTIN_MASKLOADPS,
+ IX86_BUILTIN_MASKLOADPD256, IX86_BUILTIN_MASKLOADPS256,
+ IX86_BUILTIN_MASKSTOREPD, IX86_BUILTIN_MASKSTOREPS,
+ IX86_BUILTIN_MASKSTOREPD256 and IX86_BUILTIN_MASKSTOREPS256.
+ (ix86_builtins): Add IX86_BUILTIN_ADDPD256,
+ IX86_BUILTIN_ADDPS256, IX86_BUILTIN_ADDSUBPD256,
+ IX86_BUILTIN_ADDSUBPS256, IX86_BUILTIN_ANDPD256,
+ IX86_BUILTIN_ANDPS256, IX86_BUILTIN_ANDNPD256,
+ IX86_BUILTIN_ANDNPS256, IX86_BUILTIN_DIVPD256,
+ IX86_BUILTIN_DIVPS256, IX86_BUILTIN_HADDPD256,
+ IX86_BUILTIN_HSUBPS256, IX86_BUILTIN_HSUBPD256,
+ IX86_BUILTIN_HADDPS256, IX86_BUILTIN_MAXPD256,
+ IX86_BUILTIN_MAXPS256, IX86_BUILTIN_MINPD256,
+ IX86_BUILTIN_MINPS256, IX86_BUILTIN_MULPD256,
+ IX86_BUILTIN_MULPS256, IX86_BUILTIN_ORPD256,
+ IX86_BUILTIN_ORPS256, IX86_BUILTIN_SUBPD256,
+ IX86_BUILTIN_SUBPS256, IX86_BUILTIN_XORPD256,
+ IX86_BUILTIN_XORPS256, IX86_BUILTIN_VPERMILVARPD,
+ IX86_BUILTIN_VPERMILVARPS, IX86_BUILTIN_VPERMILVARPD256,
+ IX86_BUILTIN_VPERMILVARPS256, IX86_BUILTIN_BLENDPD256,
+ IX86_BUILTIN_BLENDPS256, IX86_BUILTIN_BLENDVPD256,
+ IX86_BUILTIN_BLENDVPS256, IX86_BUILTIN_DPPS256,
+ IX86_BUILTIN_SHUFPD256, IX86_BUILTIN_SHUFPS256,
+ IX86_BUILTIN_CMPSD, IX86_BUILTIN_CMPSS, IX86_BUILTIN_CMPPD,
+ IX86_BUILTIN_CMPPS,
+ IX86_BUILTIN_CMPPD256,IX86_BUILTIN_CMPPS256,
+ IX86_BUILTIN_EXTRACTF128PD256, IX86_BUILTIN_EXTRACTF128PS256,
+ IX86_BUILTIN_EXTRACTF128SI256, IX86_BUILTIN_CVTDQ2PD256,
+ IX86_BUILTIN_CVTDQ2PS256, IX86_BUILTIN_CVTPD2PS256,
+ IX86_BUILTIN_CVTPS2DQ256, IX86_BUILTIN_CVTPS2PD256,
+ IX86_BUILTIN_CVTTPD2DQ256, IX86_BUILTIN_CVTPD2DQ256,
+ IX86_BUILTIN_CVTTPS2DQ256, IX86_BUILTIN_VPERM2F128PD256,
+ IX86_BUILTIN_VPERM2F128PS256, IX86_BUILTIN_VPERM2F128SI256,
+ IX86_BUILTIN_VPERMILPD, IX86_BUILTIN_VPERMILPS,
+ IX86_BUILTIN_VPERMILPD256, IX86_BUILTIN_VPERMILPS256,
+ IX86_BUILTIN_VPERMIL2PD, IX86_BUILTIN_VPERMILPS,
+ IX86_BUILTIN_VPERMILPD256, IX86_BUILTIN_VPERMILPS256,
+ IX86_BUILTIN_VPERMIL2PD, IX86_BUILTIN_VPERMIL2PS,
+ IX86_BUILTIN_VPERMIL2PD256, IX86_BUILTIN_VPERMIL2PS256,
+ IX86_BUILTIN_VINSERTF128PD256, IX86_BUILTIN_VINSERTF128PS256,
+ IX86_BUILTIN_VINSERTF128SI256, IX86_BUILTIN_MOVSHDUP256,
+ IX86_BUILTIN_MOVSLDUP256, IX86_BUILTIN_MOVDDUP256,
+ IX86_BUILTIN_SQRTPD256, IX86_BUILTIN_SQRTPS256,
+ IX86_BUILTIN_SQRTPS_NR256, IX86_BUILTIN_RSQRTPS256,
+ IX86_BUILTIN_RSQRTPS_NR256, IX86_BUILTIN_RCPPS256,
+ IX86_BUILTIN_ROUNDPD256, IX86_BUILTIN_ROUNDPS256,
+ IX86_BUILTIN_UNPCKHPD256, IX86_BUILTIN_UNPCKLPD256,
+ IX86_BUILTIN_UNPCKHPS256, IX86_BUILTIN_UNPCKLPS256,
+ IX86_BUILTIN_SI256_SI, IX86_BUILTIN_PS256_PS,
+ IX86_BUILTIN_PD256_PD, IX86_BUILTIN_SI_SI256,
+ IX86_BUILTIN_PS_PS256, IX86_BUILTIN_PD_PD256,
+ IX86_BUILTIN_VTESTZPD, IX86_BUILTIN_VTESTCPD,
+ IX86_BUILTIN_VTESTNZCPD, IX86_BUILTIN_VTESTZPS,
+ IX86_BUILTIN_VTESTCPS, IX86_BUILTIN_VTESTNZCPS,
+ IX86_BUILTIN_VTESTZPD256, IX86_BUILTIN_VTESTCPD256,
+ IX86_BUILTIN_VTESTNZCPD256, IX86_BUILTIN_VTESTZPS256,
+ IX86_BUILTIN_VTESTCPS256, IX86_BUILTIN_VTESTNZCPS256,
+ IX86_BUILTIN_PTESTZ256, IX86_BUILTIN_PTESTC256,
+ IX86_BUILTIN_PTESTNZC256, IX86_BUILTIN_MOVMSKPD256 and
+ IX86_BUILTIN_MOVMSKPS256.
+ (ix86_init_mmx_sse_builtins): Support AVX builtins.
+ (ix86_expand_args_builtin): Likewise.
+ (ix86_expand_special_args_builtin): Likewise.
+ (ix86_hard_regno_mode_ok): Handle AVX modes.
+ (ix86_expand_vector_init_duplicate): Likewise.
+ (ix86_expand_vector_init_one_nonzero): Likewise.
+ (ix86_expand_vector_init_one_var): Likewise.
+ (ix86_expand_vector_init_concat): Likewise.
+ (ix86_expand_vector_init_general): Likewise.
+ (ix86_expand_vector_set): Likewise.
+ (ix86_vector_mode_supported_p): Likewise.
+ (x86_extended_reg_mentioned_p): Check INSN_P before using
+ PATTERN.
+
+ * config/i386/i386-c.c (ix86_target_macros_internal): Handle
+ OPTION_MASK_ISA_AVX and OPTION_MASK_ISA_FMA.
+
+ * config/i386/i386.h (TARGET_AVX): New.
+ (TARGET_FMA): Likewise.
+ (TARGET_CPU_CPP_BUILTINS): Handle TARGET_AVX and TARGET_FMA.
+ (BIGGEST_ALIGNMENT): Set to 256 for TARGET_AVX.
+ (VALID_AVX256_REG_MODE): New.
+ (AVX256_VEC_FLOAT_MODE_P): Likewise.
+ (AVX_FLOAT_MODE_P): Likewise.
+ (AVX128_VEC_FLOAT_MODE_P): Likewise.
+ (AVX256_VEC_FLOAT_MODE_P): Likewise.
+ (AVX_VEC_FLOAT_MODE_P): Likewise.
+ (ASM_OUTPUT_AVX_PREFIX): Likewise.
+ (ASM_OUTPUT_OPCODE): Likewise.
+ (UNITS_PER_SIMD_WORD): Add a FIXME for 32byte vectorizer
+ support.
+ (SSE_REG_MODE_P): Allow 256bit vector modes.
+ (ix86_args): Add a warn_avx field.
+
+ * config/i386/i386.md (UNSPEC_PCMP): New.
+ (UNSPEC_VPERMIL): Likewise.
+ (UNSPEC_VPERMIL2): Likewise.
+ (UNSPEC_VPERMIL2F128): Likewise.
+ (UNSPEC_MASKLOAD): Likewise.
+ (UNSPEC_MASKSTORE): Likewise.
+ (UNSPEC_CAST): Likewise.
+ (UNSPEC_VTESTP): Likewise.
+ (UNSPECV_VZEROALL): Likewise.
+ (UNSPECV_VZEROUPPER): Likewise.
+ (XMM0_REG): Likewise.
+ (XMM1_REG): Likewise.
+ (XMM2_REG): Likewise.
+ (XMM3_REG): Likewise.
+ (XMM4_REG): Likewise.
+ (XMM5_REG): Likewise.
+ (XMM6_REG): Likewise.
+ (XMM8_REG): Likewise.
+ (XMM9_REG): Likewise.
+ (XMM10_REG): Likewise.
+ (XMM11_REG): Likewise.
+ (XMM12_REG): Likewise.
+ (XMM13_REG): Likewise.
+ (XMM14_REG): Likewise.
+ (XMM15_REG): Likewise.
+ (prefix): Likewise.
+ (prefix_vex_imm8): Likewise.
+ (prefix_vex_w): Likewise.
+ (length_vex): Likewise.
+ (maxmin): Likewise.
+ (movoi): Likewise.
+ (*avx_ashlti3): Likewise.
+ (*avx_lshrti3): Likewise.
+ (*avx_setcc<mode>): Likewise.
+ (*fop_<mode>_comm_mixed_avx): Likewise.
+ (*fop_<mode>_comm_avx): Likewise.
+ (*fop_<mode>_1_mixed_avx): Likewise.
+ (*fop_<mode>_1_avx): Likewise.
+ (*avx_<code><mode>3): Likewise.
+ (*avx_ieee_smin<mode>3): Likewise.
+ (*avx_ieee_smax<mode>3): Likewise.
+ (mode): Add OI, V8SF and V4DF.
+ (length): Support VEX prefix.
+ (*cmpfp_i_mixed): Set prefix attribute.
+ (*cmpfp_i_sse): Likewise.
+ (*cmpfp_iu_mixed): Likewise.
+ (*cmpfp_iu_sse): Likewise.
+ (*movsi_1): Support AVX.
+ (*movdi_2): Likewise.
+ (*movdi_1_rex64): Likewise.
+ (*movti_internal): Likewise.
+ (*movti_rex64): Likewise.
+ (*movsf_1): Likewise.
+ (*movdf_nointeger): Likewise.
+ (*movdf_integer_rex64): Likewise.
+ (*movtf_internal): Likewise.
+ (zero_extendsidi2_32): Likewise.
+ (zero_extendsidi2_rex64): Likewise.
+ (*extendsfdf2_mixed): Likewise.
+ (*extendsfdf2_sse): Likewise.
+ (*truncdfsf_fast_mixed): Likewise.
+ (*truncdfsf_fast_sse): Likewise.
+ (*truncdfsf_mixed): Likewise.
+ (fix_trunc<mode>di_sse): Likewise.
+ (fix_trunc<mode>si_sse): Likewise.
+ (*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit): Likewise.
+ (*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit): Likewise.
+ (*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit): Likewise.
+ (*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit): Likewise.
+ (*rcpsf2_sse): Likewise.
+ (*rsqrtsf2_sse): Likewise.
+ (*sqrt<mode>2_sse): Likewise.
+ (sse4_1_round<mode>2): Likewise.
+ (*sse_prologue_save_insn): Disallow REX prefix for AVX.
+ Support AVX. Set length attribute properly for AVX.
+
+ * config/i386/i386-modes.def (VECTOR_MODES (INT, 32)): New.
+ (VECTOR_MODES (FLOAT, 32)): Likewise.
+ (VECTOR_MODE (INT, DI, 8)): Likewise.
+ (VECTOR_MODE (INT, HI, 32)): Likewise.
+ (VECTOR_MODE (INT, QI, 64)): Likewise.
+ (VECTOR_MODE (FLOAT, DF, 8)): Likewise.
+ (VECTOR_MODE (FLOAT, SF, 16)): Likewise.
+ (VECTOR_MODE (INT, DI, 4)): Removed.
+ (VECTOR_MODE (INT, SI, 8)): Likewise.
+ (VECTOR_MODE (INT, HI, 16)): Likewise.
+ (VECTOR_MODE (INT, QI, 32)): Likewise.
+ (VECTOR_MODE (FLOAT, SF, 8)): Likewise.
+ (INT_MODE (OI, 32)): Likewise.
+
+ * config/i386/i386.opt (mavx): New.
+ (mfma): Likewise.
+
+ * config/i386/i386-protos.h (ix86_attr_length_vex_default): New.
+
+ * config/i386/mmx.md (*mov<mode>_internal_rex64): Support AVX.
+ (*mov<mode>_internal_avx): New.
+ (*movv2sf_internal_rex64_avx): Likewise.
+ (*movv2sf_internal_avx): Likewise.
+
+ * config/i386/predicates.md (const_4_to_5_operand): New.
+ (const_6_to_7_operand): Likewise.
+ (const_8_to_11_operand): Likewise.
+ (const_12_to_15_operand): Likewise.
+ (avx_comparison_float_operator): Likewise.
+
+ * config/i386/sse.md (AVX256MODEI): New.
+ (AVX256MODE): Likewise.
+ (AVXMODEQI): Likewise.
+ (AVXMODE): Likewise.
+ (AVX256MODEF2P): Likewise.
+ (AVX256MODE2P): Likewise.
+ (AVX256MODE4P): Likewise.
+ (AVX256MODE8P): Likewise.
+ (AVXMODEF2P): Likewise.
+ (AVXMODEF4P): Likewise.
+ (AVXMODEDCVTDQ2PS): Likewise.
+ (AVXMODEDCVTPS2DQ): Likewise.
+ (avxvecmode): Likewise.
+ (avxvecpsmode): Likewise.
+ (avxhalfvecmode): Likewise.
+ (avxscalarmode): Likewise.
+ (avxcvtvecmode): Likewise.
+ (avxpermvecmode): Likewise.
+ (avxmodesuffixf2c): Likewise.
+ (avxmodesuffixp): Likewise.
+ (avxmodesuffixs): Likewise.
+ (avxmodesuffix): Likewise.
+ (vpermilbits): Likewise.
+ (pinsrbits): Likewise.
+ (mov<mode>): Likewise.
+ (*mov<mode>_internal): Likewise.
+ (push<mode>1): Likewise.
+ (movmisalign<mode>): Likewise.
+ (avx_movup<avxmodesuffixf2c><avxmodesuffix>): Likewise.
+ (avx_movdqu<avxmodesuffix>): Likewise.
+ (avx_lddqu<avxmodesuffix>): Likewise.
+ (<plusminus_insn><mode>3): Likewise.
+ (*avx_<plusminus_insn><mode>3): Likewise.
+ (*avx_vm<plusminus_insn><mode>3): Likewise.
+ (mul<mode>3): Likewise.
+ (*avx_mul<mode>3): Likewise.
+ (*avx_vmmul<mode>3): Likewise.
+ (divv8sf3): Likewise.
+ (divv4df3): Likewise.
+ (avx_div<mode>3): Likewise.
+ (*avx_div<mode>3): Likewise.
+ (*avx_vmdiv<mode>3): Likewise.
+ (avx_rcpv8sf2): Likewise.
+ (*avx_vmrcpv4sf2): Likewise.
+ (sqrtv8sf2): Likewise.
+ (avx_sqrtv8sf2): Likewise.
+ (*avx_vmsqrt<mode>2): Likewise.
+ (rsqrtv8sf2): Likewise.
+ (avx_rsqrtv8sf2): Likewise.
+ (*avx_vmrsqrtv4sf2): Likewise.
+ (<code><mode>3): Likewise.
+ (*avx_<code><mode>3_finite): Likewise.
+ (*avx_<code><mode>3): Likewise.
+ (*avx_vm<code><mode>3): Likewise.
+ (*avx_ieee_smin<mode>3): Likewise.
+ (*avx_ieee_smax<mode>3): Likewise.
+ (avx_addsubv8sf3): Likewise.
+ (avx_addsubv4df3): Likewise.
+ (*avx_addsubv4sf3): Likewise.
+ (*avx_addsubv2df3): Likewise.
+ (avx_h<plusminus_insn>v4df3): Likewise.
+ (avx_h<plusminus_insn>v8sf3): Likewise.
+ (*avx_h<plusminus_insn>v4sf3): Likewise.
+ (*avx_h<plusminus_insn>v2df3): Likewise.
+ (avx_cmpp<avxmodesuffixf2c><mode>3): Likewise.
+ (avx_cmps<ssemodesuffixf2c><mode>3): Likewise.
+ (*avx_maskcmp<mode>3): Likewise.
+ (avx_nand<mode>3): Likewise.
+ (*avx_<code><mode>3): Likewise.
+ (*avx_nand<mode>3): Likewise.
+ (*avx_<code><mode>3): Likewise.
+ (*avx_cvtsi2ss): Likewise.
+ (*avx_cvtsi2ssq): Likewise.
+ (*avx_cvtsi2sd): Likewise.
+ (*avx_cvtsi2sdq): Likewise.
+ (*avx_cvtsd2ss): Likewise.
+ (avx_cvtss2sd): Likewise.
+ (avx_cvtdq2ps<avxmodesuffix>): Likewise.
+ (avx_cvtps2dq<avxmodesuffix>): Likewise.
+ (avx_cvttps2dq<avxmodesuffix>): Likewise.
+ (*avx_cvtsi2sd): Likewise.
+ (*avx_cvtsi2sdq): Likewise.
+ (avx_cvtdq2pd256): Likewise.
+ (avx_cvtpd2dq256): Likewise.
+ (avx_cvttpd2dq256): Likewise.
+ (*avx_cvtsd2ss): Likewise.
+ (*avx_cvtss2sd): Likewise.
+ (avx_cvtpd2ps256): Likewise.
+ (avx_cvtps2pd256): Likewise.
+ (*avx_movhlps): Likewise.
+ (*avx_movlhps): Likewise.
+ (avx_unpckhps256): Likewise.
+ (*avx_unpckhps): Likewise.
+ (avx_unpcklps256): Likewise.
+ (*avx_unpcklps): Likewise.
+ (avx_movshdup256): Likewise.
+ (avx_movsldup256): Likewise.
+ (avx_shufps256): Likewise.
+ (avx_shufps256_1): Likewise.
+ (*avx_shufps_<mode>): Likewise.
+ (*avx_loadhps): Likewise.
+ (*avx_storelps): Likewise.
+ (*avx_loadlps): Likewise.
+ (*avx_movss): Likewise.
+ (*vec_dupv4sf_avx): Likewise.
+ (*vec_concatv2sf_avx): Likewise.
+ (*vec_concatv4sf_avx): Likewise.
+ (*vec_setv4sf_0_avx): Likewise.
+ (*vec_setv4sf_avx): Likewise.
+ (*avx_insertps): Likewise.
+ (avx_vextractf128<mode>): Likewise.
+ (vec_extract_lo_<mode>): Likewise.
+ (vec_extract_hi_<mode>): Likewise.
+ (vec_extract_lo_<mode>): Likewise.
+ (vec_extract_hi_<mode>): Likewise.
+ (vec_extract_lo_v16hi): Likewise.
+ (vec_extract_hi_v16hi): Likewise.
+ (vec_extract_lo_v32qi): Likewise.
+ (vec_extract_hi_v32qi): Likewise.
+ (avx_unpckhpd256): Likewise.
+ (*avx_unpckhpd): Likewise.
+ (avx_movddup256): Likewise.
+ (*avx_movddup): Likewise.
+ (avx_unpcklpd256): Likewise.
+ (*avx_unpcklpd): Likewise.
+ (avx_shufpd256): Likewise.
+ (avx_shufpd256_1): Likewise.
+ (*avx_punpckhqdq): Likewise.
+ (*avx_punpcklqdq): Likewise.
+ (*avx_shufpd_<mode>): Likewise.
+ (*avx_storehpd): Likewise.
+ (*avx_loadhpd): Likewise.
+ (*avx_loadlpd): Likewise.
+ (*avx_movsd): Likewise.
+ (*vec_concatv2df_avx): Likewise.
+ (*avx_<plusminus_insn><mode>3): Likewise.
+ (*avx_<plusminus_insn><mode>3): Likewise.
+ (*avx_mulv8hi3): Likewise.
+ (*avxv8hi3_highpart): Likewise.
+ (*avx_umulv8hi3_highpart): Likewise.
+ (*avx_umulv2siv2di3): Likewise.
+ (*avx_mulv2siv2di3): Likewise.
+ (*avx_pmaddwd): Likewise.
+ (*avx_mulv4si3): Likewise.
+ (*avx_ashr<mode>3): Likewise.
+ (*avx_lshr<mode>3): Likewise.
+ (*avx_ashl<mode>3): Likewise.
+ (*avx_<code><mode>3): Likewise.
+ (*avx_eq<mode>3): Likewise.
+ (*avx_gt<mode>3): Likewise.
+ (*avx_nand<mode>3): Likewise.
+ (*avx_nand<mode>3): Likewise.
+ (*avx_<code><mode>3): Likewise.
+ (*avx_<code><mode>3): Likewise.
+ (*avx_packsswb): Likewise.
+ (*avx_packssdw): Likewise.
+ (*avx_packuswb): Likewise.
+ (*avx_punpckhbw): Likewise.
+ (*avx_punpcklbw): Likewise.
+ (*avx_punpckhwd): Likewise.
+ (*avx_punpcklwd): Likewise.
+ (*avx_punpckhdq): Likewise.
+ (*avx_punpckldq): Likewise.
+ (*avx_pinsr<avxmodesuffixs>): Likewise.
+ (*avx_pinsrq): Likewise.
+ (*avx_loadld): Likewise.
+ (*vec_extractv2di_1_rex64_avx): Likewise.
+ (*vec_extractv2di_1_avx): Likewise.
+ (*vec_dupv2di_avx): Likewise.
+ (*vec_concatv2si_avx): Likewise.
+ (*vec_concatv4si_1_avx): Likewise.
+ (*vec_concatv2di_avx): Likewise.
+ (*vec_concatv2di_rex64_avx): Likewise.
+ (*avx_uavgv16qi3): Likewise.
+ (*avx_uavgv8hi3): Likewise.
+ (*avx_psadbw): Likewise.
+ (avx_movmskp<avxmodesuffixf2c>256): Likewise.
+ (*avx_phaddwv8hi3): Likewise.
+ (*avx_phadddv4si3): Likewise.
+ (*avx_phaddswv8hi3): Likewise.
+ (*avx_phsubwv8hi3): Likewise.
+ (*avx_phsubdv4si3): Likewise.
+ (*avx_phsubswv8hi3): Likewise.
+ (*avx_pmaddubsw128): Likewise.
+ (*avx_pmulhrswv8hi3): Likewise.
+ (*avx_pshufbv16qi3): Likewise.
+ (*avx_psign<mode>3): Likewise.
+ (*avx_palignrti): Likewise.
+ (avx_blendp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
+ (avx_blendvp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
+ (avx_dpp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
+ (*avx_mpsadbw): Likewise.
+ (*avx_packusdw): Likewise.
+ (*avx_pblendvb): Likewise.
+ (*avx_pblendw): Likewise.
+ (avx_vtestp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
+ (avx_ptest256): Likewise.
+ (avx_roundp<avxmodesuffixf2c>256): Likewise.
+ (*avx_rounds<ssemodesuffixf2c>): Likewise.
+ (*avx_aesenc): Likewise.
+ (*avx_aesenclast): Likewise.
+ (*avx_aesdec): Likewise.
+ (*avx_aesdeclast): Likewise.
+ (avx_vzeroupper): Likewise.
+ (avx_vzeroupper_rex64): Likewise.
+ (avx_vpermil<mode>): Likewise.
+ (avx_vpermilvar<mode>3): Likewise.
+ (avx_vpermil2<mode>3): Likewise.
+ (avx_vperm2f128<mode>3): Likewise.
+ (avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>): Likewise.
+ (avx_vbroadcastss256): Likewise.
+ (avx_vbroadcastf128_p<avxmodesuffixf2c>256): Likewise.
+ (avx_vinsertf128<mode>): Likewise.
+ (vec_set_lo_<mode>): Likewise.
+ (vec_set_hi_<mode>): Likewise.
+ (vec_set_lo_<mode>): Likewise.
+ (vec_set_hi_<mode>): Likewise.
+ (vec_set_lo_v16hi): Likewise.
+ (vec_set_hi_v16hi): Likewise.
+ (vec_set_lo_v32qi): Likewise.
+ (vec_set_hi_v32qi): Likewise.
+ (avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
+ (avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>): Likewise.
+ (avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>): Likewise.
+ (avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>): Likewise.
+ (vec_init<mode>): Likewise.
+ (*vec_concat<mode>_avx): Likewise.
+ (blendbits): Support V8SF and V4DF.
+ (sse2_movq128): Support AVX.
+ (<sse>_movnt<mode>): Likewise.
+ (sse2_movntv2di): Likewise.
+ (sse_rcpv4sf2): Likewise.
+ (sse_sqrtv4sf2): Likewise.
+ (sse_rsqrtv4sf2): Likewise.
+ (<sse>_comi): Likewise.
+ (<sse>_ucomi): Likewise.
+ (sse_cvtss2si): Likewise.
+ (sse_cvtss2si_2): Likewise.
+ (sse_cvtss2siq): Likewise.
+ (sse_cvtss2siq_2): Likewise.
+ (sse_cvttss2si): Likewise.
+ (sse_cvttss2siq): Likewise.
+ (sse2_cvtsd2si): Likewise.
+ (sse2_cvtsd2si_2): Likewise.
+ (sse2_cvtsd2siq): Likewise.
+ (sse2_cvtsd2siq_2): Likewise.
+ (sse2_cvttsd2si): Likewise.
+ (sse2_cvttsd2siq): Likewise.
+ (sse2_cvtdq2pd): Likewise.
+ (*sse2_cvtpd2dq): Likewise.
+ (*sse2_cvttpd2dq): Likewise.
+ (*sse2_cvtpd2ps): Likewise.
+ (sse2_cvtps2pd): Likewise.
+ (sse3_movshdup): Likewise.
+ (sse3_movsldup): Likewise.
+ (sse_storehps): Likewise.
+ (*sse4_1_extractps): Likewise.
+ (sse2_storelpd): Likewise.
+ (vec_dupv2df_sse3): Likewise.
+ (*vec_concatv2df_sse3): Likewise.
+ (*sse4_1_pextrb): Likewise.
+ (*sse4_1_pextrb_memory): Likewise.
+ (*sse2_pextrw): Likewise.
+ (*sse4_1_pextrw_memory): Likewise.
+ (*sse4_1_pextrd): Likewise.
+ (*sse4_1_pextrq): Likewise.
+ (sse2_pshufd_1): Likewise.
+ (sse2_pshuflw_1): Likewise.
+ (sse2_pshufhw_1): Likewise.
+ (*sse2_storeq_rex64): Likewise.
+ (*vec_dupv4si): Likewise.
+ (<sse>_movmskp<ssemodesuffixf2c>): Likewise.
+ (sse2_pmovmskb): Likewise.
+ (*sse2_maskmovdqu): Likewise.
+ (*sse2_maskmovdqu_rex64): Likewise.
+ (sse_ldmxcsr): Likewise.
+ (sse_stmxcsr): Likewise.
+ (abs<mode>2): Likewise.
+ (sse4_1_movntdqa): Likewise.
+ (sse4_1_phminposuw): Likewise.
+ (sse4_1_extendv8qiv8hi2): Likewise.
+ (*sse4_1_extendv8qiv8hi2): Likewise.
+ (sse4_1_extendv4qiv4si2): Likewise.
+ (*sse4_1_extendv4qiv4si2): Likewise.
+ (sse4_1_extendv2qiv2di2): Likewise.
+ (*sse4_1_extendv2qiv2di2): Likewise.
+ (sse4_1_extendv4hiv4si2): Likewise.
+ (*sse4_1_extendv4hiv4si2): Likewise.
+ (sse4_1_extendv2hiv2di2): Likewise.
+ (*sse4_1_extendv2hiv2di2): Likewise.
+ (sse4_1_extendv2siv2di2): Likewise.
+ (*sse4_1_extendv2siv2di2): Likewise.
+ (sse4_1_zero_extendv8qiv8hi2): Likewise.
+ (*sse4_1_zero_extendv8qiv8hi2): Likewise.
+ (sse4_1_zero_extendv4qiv4si2): Likewise.
+ (*sse4_1_zero_extendv4qiv4si2): Likewise.
+ (sse4_1_zero_extendv2qiv2di2): Likewise.
+ (*sse4_1_zero_extendv2qiv2di2): Likewise.
+ (sse4_1_zero_extendv4hiv4si2): Likewise.
+ (*sse4_1_zero_extendv4hiv4si2): Likewise.
+ (sse4_1_zero_extendv2hiv2di2): Likewise.
+ (*sse4_1_zero_extendv2hiv2di2): Likewise.
+ (sse4_1_zero_extendv2siv2di2): Likewise.
+ (*sse4_1_zero_extendv2siv2di2): Likewise.
+ (sse4_1_ptest): Likewise.
+ (sse4_1_roundp<ssemodesuffixf2c>): Likewise.
+ (sse4_2_pcmpestri): Likewise.
+ (sse4_2_pcmpestrm): Likewise.
+ (sse4_2_pcmpistri): Likewise.
+ (sse4_2_pcmpistrm): Likewise.
+ (aesimc): Likewise.
+ (aeskeygenassist): Likewise.
+
+2008-08-28 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/predicates.md (vzeroall_operation): New.
+
+ * config/i386/sse.md (avx_vzeroall): New.
+ (*avx_vzeroall): Likewise.
+
2008-08-28 Paul Brook <paul@codesourcery.com>
- Mark Shinwell <shinwell@codesourcery.com>
- Richard Earnshaw <richard.earnshaw@arm.com>
+ Mark Shinwell <shinwell@codesourcery.com>
+ Richard Earnshaw <richard.earnshaw@arm.com>
* config/arm/arm.c (TARGET_MAX_ANCHOR_OFFSET): New.
(TARGET_MIN_ANCHOR_OFFSET): New.
2008-08-28 Chris Fairles <chris.fairles@gmail.com>
- * gthr-posix.h (__gthread_create, __gthread_join, __gthread_detach,
+ * gthr-posix.h (__gthread_create, __gthread_join, __gthread_detach,
__gthread_mutex_timed_lock, __gthread_recursive_mutex_timed_lock,
__gthread_cond_signal, __gthread_cond_timedwait,
__gthread_cond_timedwait_recursive): New functions.
2008-08-24 Razya Ladelsky <razya@il.ibm.com>
- PR tree-optimization/37185
- * matrix-reorg.c (transform_access_sites): Update changed stmt.
+ PR tree-optimization/37185
+ * matrix-reorg.c (transform_access_sites): Update changed stmt.
2008-08-23 Jan Hubicka <jh@suse.cz>
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h mmintrin-common.h
- wmmintrin.h cross-stdarg.h"
+ wmmintrin.h gmmintrin.h cross-stdarg.h"
;;
x86_64-*-*)
cpu_type=i386
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h mmintrin-common.h
- wmmintrin.h cross-stdarg.h"
+ wmmintrin.h gmmintrin.h cross-stdarg.h"
need_64bit_hwint=yes
;;
ia64-*-*)
#define bit_SSE3 (1 << 0)
#define bit_PCLMUL (1 << 1)
#define bit_SSSE3 (1 << 9)
+#define bit_FMA (1 << 12)
#define bit_CMPXCHG16B (1 << 13)
#define bit_SSE4_1 (1 << 19)
#define bit_SSE4_2 (1 << 20)
#define bit_POPCNT (1 << 23)
#define bit_AES (1 << 25)
+#define bit_XSAVE (1 << 26)
+#define bit_OSXSAVE (1 << 27)
+#define bit_AVX (1 << 28)
/* %edx */
#define bit_CMPXCHG8B (1 << 8)
GAS version 1.38.1 doesn't understand the `repz' opcode mnemonic.
So use `repe' instead. */
+#undef ASM_OUTPUT_OPCODE
#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
{ \
if ((PTR)[0] == 'r' \
(PTR) += 5; \
} \
} \
+ else \
+ ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR)); \
}
/* Define macro used to output shift-double opcodes when the shift
--- /dev/null
+/* Copyright (C) 2008 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING. If not, write to
+ the Free Software Foundation, 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* As a special exception, if you include this header file into source
+ files compiled by GCC, this header file does not by itself cause
+ the resulting executable to be covered by the GNU General Public
+ License. This exception does not however invalidate any other
+ reasons why the executable file might be covered by the GNU General
+ Public License. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 11.0. */
+
+#ifndef _GMMINTRIN_H_INCLUDED
+#define _GMMINTRIN_H_INCLUDED
+
+#ifndef __AVX__
+# error "AVX instruction set not enabled"
+#else
+
+/* We need definitions from the SSE4, SSSE3, SSE3, SSE2 and SSE header
+ files. */
+#include <smmintrin.h>
+
+/* Internal data types for implementing the intrinsics. */
+typedef double __v4df __attribute__ ((__vector_size__ (32)));
+typedef float __v8sf __attribute__ ((__vector_size__ (32)));
+typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef short __v16hi __attribute__ ((__vector_size__ (32)));
+typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef float __m256 __attribute__ ((__vector_size__ (32),
+ __may_alias__));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+ __may_alias__));
+typedef double __m256d __attribute__ ((__vector_size__ (32),
+ __may_alias__));
+
+/* Compare predicates for scalar and packed compare intrinsics. */
+
+/* Equal (ordered, non-signaling) */
+#define _CMP_EQ_OQ 0x00
+/* Less-than (ordered, signaling) */
+#define _CMP_LT_OS 0x01
+/* Less-than-or-equal (ordered, signaling) */
+#define _CMP_LE_OS 0x02
+/* Unordered (non-signaling) */
+#define _CMP_UNORD_Q 0x03
+/* Not-equal (unordered, non-signaling) */
+#define _CMP_NEQ_UQ 0x04
+/* Not-less-than (unordered, signaling) */
+#define _CMP_NLT_US 0x05
+/* Not-less-than-or-equal (unordered, signaling) */
+#define _CMP_NLE_US 0x06
+/* Ordered (nonsignaling) */
+#define _CMP_ORD_Q 0x07
+/* Equal (unordered, non-signaling) */
+#define _CMP_EQ_UQ 0x08
+/* Not-greater-than-or-equal (unordered, signaling) */
+#define _CMP_NGE_US 0x09
+/* Not-greater-than (unordered, signaling) */
+#define _CMP_NGT_US 0x0a
+/* False (ordered, non-signaling) */
+#define _CMP_FALSE_OQ 0x0b
+/* Not-equal (ordered, non-signaling) */
+#define _CMP_NEQ_OQ 0x0c
+/* Greater-than-or-equal (ordered, signaling) */
+#define _CMP_GE_OS 0x0d
+/* Greater-than (ordered, signaling) */
+#define _CMP_GT_OS 0x0e
+/* True (unordered, non-signaling) */
+#define _CMP_TRUE_UQ 0x0f
+/* Equal (ordered, signaling) */
+#define _CMP_EQ_OS 0x10
+/* Less-than (ordered, non-signaling) */
+#define _CMP_LT_OQ 0x11
+/* Less-than-or-equal (ordered, non-signaling) */
+#define _CMP_LE_OQ 0x12
+/* Unordered (signaling) */
+#define _CMP_UNORD_S 0x13
+/* Not-equal (unordered, signaling) */
+#define _CMP_NEQ_US 0x14
+/* Not-less-than (unordered, non-signaling) */
+#define _CMP_NLT_UQ 0x15
+/* Not-less-than-or-equal (unordered, non-signaling) */
+#define _CMP_NLE_UQ 0x16
+/* Ordered (signaling) */
+#define _CMP_ORD_S 0x17
+/* Equal (unordered, signaling) */
+#define _CMP_EQ_US 0x18
+/* Not-greater-than-or-equal (unordered, non-signaling) */
+#define _CMP_NGE_UQ 0x19
+/* Not-greater-than (unordered, non-signaling) */
+#define _CMP_NGT_UQ 0x1a
+/* False (ordered, signaling) */
+#define _CMP_FALSE_OS 0x1b
+/* Not-equal (ordered, signaling) */
+#define _CMP_NEQ_OS 0x1c
+/* Greater-than-or-equal (ordered, non-signaling) */
+#define _CMP_GE_OQ 0x1d
+/* Greater-than (ordered, non-signaling) */
+#define _CMP_GT_OQ 0x1e
+/* True (unordered, signaling) */
+#define _CMP_TRUE_US 0x1f
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_addpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_addps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addsub_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addsub_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+/* Double/single precision floating point blend instructions - select
+ data from 2 sources using constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_pd (__m256d __X, __m256d __Y, const int __M)
+{
+ return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X,
+ (__v4df)__Y,
+ __M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_ps (__m256 __X, __m256 __Y, const int __M)
+{
+ return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ __M);
+}
+#else
+#define _mm256_blend_pd(X, Y, M) \
+ ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(M)))
+
+#define _mm256_blend_ps(X, Y, M) \
+ ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(M)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M)
+{
+ return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X,
+ (__v4df)__Y,
+ (__v4df)__M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M)
+{
+ return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ (__v8sf)__M);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_divpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_divps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+/* Dot product instructions with mask-defined summing and zeroing parts
+ of result. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dp_ps (__m256 __X, __m256 __Y, const int __M)
+{
+ return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ __M);
+}
+#else
+#define _mm256_dp_ps(X, Y, M) \
+ ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(M)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_pd (__m256d __X, __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_ps (__m256 __X, __m256 __Y)
+{
+ return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_pd (__m256d __X, __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_ps (__m256 __X, __m256 __Y)
+{
+ return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_mulpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_mulps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_pd (__m256d __A, __m256d __B, const int __mask)
+{
+ return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B,
+ __mask);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask)
+{
+ return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B,
+ __mask);
+}
+#else
+#define _mm256_shuffle_pd(A, B, N) \
+ ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(N)))
+
+#define _mm256_shuffle_ps(A, B, N) \
+ ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(N)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_subpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_subps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
+{
+ return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y,
+ __P);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
+{
+ return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
+ __P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+#else
+#define _mm_cmp_pd(X, Y, P) \
+ ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_ps(X, Y, P) \
+ ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P)))
+
+#define _mm256_cmp_pd(X, Y, P) \
+ ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(P)))
+
+#define _mm256_cmp_ps(X, Y, P) \
+ ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(P)))
+
+#define _mm_cmp_sd(X, Y, P) \
+ ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_ss(X, Y, P) \
+ ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_pd (__m128i __A)
+{
+ return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_ps (__m256i __A)
+{
+ return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_ps (__m256d __A)
+{
+ return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epi32 (__m256 __A)
+{
+ return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_pd (__m128 __A)
+{
+ return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epi32 (__m256d __A)
+{
+ return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epi32 (__m256d __A)
+{
+ return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epi32 (__m256 __A)
+{
+ return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_pd (__m256d __X, const int __N)
+{
+ return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_ps (__m256 __X, const int __N)
+{
+ return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_si256 (__m256i __X, const int __N)
+{
+ return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi32 (__m256i __X, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
+ return _mm_extract_epi32 (__Y, __N % 4);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi16 (__m256i __X, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
+ return _mm_extract_epi16 (__Y, __N % 8);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi8 (__m256i __X, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
+ return _mm_extract_epi8 (__Y, __N % 16);
+}
+
+#ifdef __x86_64__
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi64 (__m256i __X, const int __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
+ return _mm_extract_epi64 (__Y, __N % 2);
+}
+#endif
+#else
+#define _mm256_extractf128_pd(X, N) \
+ ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X), \
+ (int)(N)))
+
+#define _mm256_extractf128_ps(X, N) \
+ ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X), \
+ (int)(N)))
+
+#define _mm256_extractf128_si256(X, N) \
+ ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X), \
+ (int)(N)))
+
+#define _mm256_extract_epi32(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \
+ _mm_extract_epi32 (__Y, (N) % 4); \
+ }))
+
+#define _mm256_extract_epi16(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \
+ _mm_extract_epi16 (__Y, (N) % 8); \
+ }))
+
+#define _mm256_extract_epi8(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \
+ _mm_extract_epi8 (__Y, (N) % 16); \
+ }))
+
+#ifdef __x86_64__
+#define _mm256_extract_epi64(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \
+ _mm_extract_epi64 (__Y, (N) % 2); \
+ }))
+#endif
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zeroall (void)
+{
+ __builtin_ia32_vzeroall ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zeroupper (void)
+{
+ __builtin_ia32_vzeroupper ();
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutevar_pd (__m128d __A, __m128i __C)
+{
+ return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
+ (__v2di)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar_pd (__m256d __A, __m256i __C)
+{
+ return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
+ (__v4di)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutevar_ps (__m128 __A, __m128i __C)
+{
+ return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
+ (__v4si)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar_ps (__m256 __A, __m256i __C)
+{
+ return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
+ (__v8si)__C);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute_pd (__m128d __X, const int __C)
+{
+ return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute_pd (__m256d __X, const int __C)
+{
+ return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute_ps (__m128 __X, const int __C)
+{
+ return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute_ps (__m256 __X, const int __C)
+{
+ return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
+{
+ return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
+ (__v2df)__Y,
+ (__v2di)__C,
+ __I);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
+{
+ return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
+ (__v4df)__Y,
+ (__v4di)__C,
+ __I);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
+{
+ return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
+ (__v4sf)__Y,
+ (__v4si)__C,
+ __I);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
+{
+ return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ (__v8si)__C,
+ __I);
+}
+#else
+#define _mm_permute_pd(X, C) \
+ ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))
+
+#define _mm256_permute_pd(X, C) \
+ ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X), (int)(C)))
+
+#define _mm_permute_ps(X, C) \
+ ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C)))
+
+#define _mm256_permute_ps(X, C) \
+ ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
+
+#define _mm_permute2_pd(X, Y, C, I) \
+ ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128d)(C), \
+ (int)(I)))
+
+#define _mm256_permute2_pd(X, Y, C, I) \
+ ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256d)(C), \
+ (int)(I)))
+
+#define _mm_permute2_ps(X, Y, C, I) \
+ ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128)(C), \
+ (int)(I)))
+
+#define _mm256_permute2_ps(X, Y, C, I) \
+ ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256)(C), \
+ (int)(I)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_pd (__m256d __X, __m256d __Y, const int __C)
+{
+ return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X,
+ (__v4df)__Y,
+ __C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_ps (__m256 __X, __m256 __Y, const int __C)
+{
+ return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ __C);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_si256 (__m256i __X, __m256i __Y, const int __C)
+{
+ return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X,
+ (__v8si)__Y,
+ __C);
+}
+#else
+#define _mm256_permute2f128_pd(X, Y, C) \
+ ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (int)(C)))
+
+#define _mm256_permute2f128_ps(X, Y, C) \
+ ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (int)(C)))
+
+#define _mm256_permute2f128_si256(X, Y, C) \
+ ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), \
+ (int)(C)))
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcast_ss (float const *__X)
+{
+ return (__m128) __builtin_ia32_vbroadcastss (__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_sd (double const *__X)
+{
+ return (__m256d) __builtin_ia32_vbroadcastsd256 (__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_ss (float const *__X)
+{
+ return (__m256) __builtin_ia32_vbroadcastss256 (__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_pd (__m128d const *__X)
+{
+ return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_ps (__m128 const *__X)
+{
+ return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O)
+{
+ return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
+ (__v2df)__Y,
+ __O);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
+{
+ return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
+ (__v4sf)__Y,
+ __O);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O)
+{
+ return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
+ (__v4si)__Y,
+ __O);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi32 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
+ __Y = _mm_insert_epi16 (__Y, __D, __N % 4);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 2);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi16 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
+ __Y = _mm_insert_epi16 (__Y, __D, __N % 8);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 3);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi8 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
+ __Y = _mm_insert_epi8 (__Y, __D, __N % 16);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 4);
+}
+
+#ifdef __x86_64__
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi64 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
+ __Y = _mm_insert_epi16 (__Y, __D, __N % 2);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 1);
+}
+#endif
+#else
+#define _mm256_insertf128_pd(X, Y, O) \
+ ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(O)))
+
+#define _mm256_insertf128_ps(X, Y, O) \
+ ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(O)))
+
+#define _mm256_insertf128_si256(X, Y, O) \
+ ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X), \
+ (__v4si)(__m128i)(Y), \
+ (int)(O)))
+
+#define _mm256_insert_epi32(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \
+ __Y = _mm_insert_epi32 (__Y, (D), (N) % 4); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 2); \
+ }))
+
+#define _mm256_insert_epi16(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \
+ __Y = _mm_insert_epi16 (__Y, (D), (N) % 8); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 3); \
+ }))
+
+#define _mm256_insert_epi8(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \
+ __Y = _mm_insert_epi8 (__Y, (D), (N) % 16); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 4); \
+ }))
+
+#ifdef __x86_64__
+#define _mm256_insert_epi64(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \
+ __Y = _mm_insert_epi64 (__Y, (D), (N) % 2); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 1); \
+ }))
+#endif
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_pd (double const *__P)
+{
+ return *(__m256d *)__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_pd (double *__P, __m256d __A)
+{
+ *(__m256d *)__P = __A;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_ps (float const *__P)
+{
+ return *(__m256 *)__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_ps (float *__P, __m256 __A)
+{
+ *(__m256 *)__P = __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_pd (double const *__P)
+{
+ return (__m256d) __builtin_ia32_loadupd256 (__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_pd (double *__P, __m256d __A)
+{
+ __builtin_ia32_storeupd256 (__P, (__v4df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_ps (float const *__P)
+{
+ return (__m256) __builtin_ia32_loadups256 (__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_ps (float *__P, __m256 __A)
+{
+ __builtin_ia32_storeups256 (__P, (__v8sf)__A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_si256 (__m256i const *__P)
+{
+ return *__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_si256 (__m256i *__P, __m256i __A)
+{
+ *__P = __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_si256 (__m256i const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqu256 ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_si256 (__m256i *__P, __m256i __A)
+{
+ __builtin_ia32_storedqu256 ((char *)__P, (__v32qi)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_pd (double const *__P, __m128d __M)
+{
+ return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
+ (__v2df)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_pd (double *__P, __m128d __M, __m128d __A)
+{
+ __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2df)__M, (__v2df)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_pd (double const *__P, __m256d __M)
+{
+ return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
+ (__v4df)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_pd (double *__P, __m256d __M, __m256d __A)
+{
+ __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4df)__M, (__v4df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_ps (float const *__P, __m128 __M)
+{
+ return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
+ (__v4sf)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_ps (float *__P, __m128 __M, __m128 __A)
+{
+ __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4sf)__M, (__v4sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_ps (float const *__P, __m256 __M)
+{
+ return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
+ (__v8sf)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_ps (float *__P, __m256 __M, __m256 __A)
+{
+ __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8sf)__M, (__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movehdup_ps (__m256 __X)
+{
+ return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_moveldup_ps (__m256 __X)
+{
+ return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movedup_pd (__m256d __X)
+{
+ return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_lddqu_si256 (__m256i const *__P)
+{
+ return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rsqrt_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_round_pd (__m256d __V, const int __M)
+{
+ return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_round_ps (__m256 __V, const int __M)
+{
+ return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
+}
+#else
+#define _mm256_round_pd(V, M) \
+ ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M)))
+
+#define _mm256_round_ps(V, M) \
+ ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M)))
+#endif
+
+#define _mm256_ceil_pd(V) _mm256_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm256_floor_pd(V) _mm256_round_pd ((V), _MM_FROUND_FLOOR)
+#define _mm256_ceil_ps(V) _mm256_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm256_floor_ps(V) _mm256_round_ps ((V), _MM_FROUND_FLOOR)
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_pd (__m256d __M, __m256d __V)
+{
+ return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_pd (__m256d __M, __m256d __V)
+{
+ return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_pd (__m256d __M, __m256d __V)
+{
+ return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_ps (__m256 __M, __m256 __V)
+{
+ return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_ps (__m256 __M, __m256 __V)
+{
+ return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_ps (__m256 __M, __m256 __V)
+{
+ return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_si256 (__m256i __M, __m256i __V)
+{
+ return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_si256 (__m256i __M, __m256i __V)
+{
+ return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_si256 (__m256i __M, __m256i __V)
+{
+ return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_pd (__m256d __A)
+{
+ return __builtin_ia32_movmskpd256 ((__v4df)__A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_ps (__m256 __A)
+{
+ return __builtin_ia32_movmskps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_pd (void)
+{
+ return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_ps (void)
+{
+ return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_si256 (void)
+{
+ return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
+}
+
+/* Create the vector [A B C D]. */
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_pd (double __A, double __B, double __C, double __D)
+{
+ return __extension__ (__m256d){ __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H]. */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_ps (float __A, float __B, float __C, float __D,
+ float __E, float __F, float __G, float __H)
+{
+ return __extension__ (__m256){ __H, __G, __F, __E,
+ __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H]. */
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi32 (int __A, int __B, int __C, int __D,
+ int __E, int __F, int __G, int __H)
+{
+ return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
+ __D, __C, __B, __A };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi16 (short __q15, short __q14, short __q13, short __q12,
+ short __q11, short __q10, short __q09, short __q08,
+ short __q07, short __q06, short __q05, short __q04,
+ short __q03, short __q02, short __q01, short __q00)
+{
+ return __extension__ (__m256i)(__v16hi){
+ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
+ };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi8 (char __q31, char __q30, char __q29, char __q28,
+ char __q27, char __q26, char __q25, char __q24,
+ char __q23, char __q22, char __q21, char __q20,
+ char __q19, char __q18, char __q17, char __q16,
+ char __q15, char __q14, char __q13, char __q12,
+ char __q11, char __q10, char __q09, char __q08,
+ char __q07, char __q06, char __q05, char __q04,
+ char __q03, char __q02, char __q01, char __q00)
+{
+ return __extension__ (__m256i)(__v32qi){
+ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
+ __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
+ __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
+ };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi64x (long long __A, long long __B, long long __C,
+ long long __D)
+{
+ return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A };
+}
+
+/* Create a vector with all elements equal to A. */
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_pd (double __A)
+{
+ return __extension__ (__m256d){ __A, __A, __A, __A };
+}
+
+/* Create a vector with all elements equal to A. */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_ps (float __A)
+{
+ return __extension__ (__m256){ __A, __A, __A, __A,
+ __A, __A, __A, __A };
+}
+
+/* Create a vector with all elements equal to A. */
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi32 (int __A)
+{
+ return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
+ __A, __A, __A, __A };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi16 (short __A)
+{
+ return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi8 (char __A)
+{
+ return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi64x (long long __A)
+{
+ return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A };
+}
+
+/* Create vectors of elements in the reversed order from the
+ _mm256_set_XXX functions. */
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_pd (double __A, double __B, double __C, double __D)
+{
+ return _mm256_set_pd (__D, __C, __B, __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_ps (float __A, float __B, float __C, float __D,
+ float __E, float __F, float __G, float __H)
+{
+ return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi32 (int __A, int __B, int __C, int __D,
+ int __E, int __F, int __G, int __H)
+{
+ return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi16 (short __q15, short __q14, short __q13, short __q12,
+ short __q11, short __q10, short __q09, short __q08,
+ short __q07, short __q06, short __q05, short __q04,
+ short __q03, short __q02, short __q01, short __q00)
+{
+ return _mm256_set_epi16 (__q00, __q01, __q02, __q03,
+ __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11,
+ __q12, __q13, __q14, __q15);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi8 (char __q31, char __q30, char __q29, char __q28,
+ char __q27, char __q26, char __q25, char __q24,
+ char __q23, char __q22, char __q21, char __q20,
+ char __q19, char __q18, char __q17, char __q16,
+ char __q15, char __q14, char __q13, char __q12,
+ char __q11, char __q10, char __q09, char __q08,
+ char __q07, char __q06, char __q05, char __q04,
+ char __q03, char __q02, char __q01, char __q00)
+{
+ return _mm256_set_epi8 (__q00, __q01, __q02, __q03,
+ __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11,
+ __q12, __q13, __q14, __q15,
+ __q16, __q17, __q18, __q19,
+ __q20, __q21, __q22, __q23,
+ __q24, __q25, __q26, __q27,
+ __q28, __q29, __q30, __q31);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi64x (long long __A, long long __B, long long __C,
+ long long __D)
+{
+ return _mm256_set_epi64x (__D, __C, __B, __A);
+}
+
+/* Casts between various SP, DP, INT vector types. Note that these do no
+ conversion of values, they just change the type. */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_ps (__m256d __A)
+{
+ return (__m256) __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_si256 (__m256d __A)
+{
+ return (__m256i) __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_pd (__m256 __A)
+{
+ return (__m256d) __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_si256(__m256 __A)
+{
+ return (__m256i) __A;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_ps (__m256i __A)
+{
+ return (__m256) __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_pd (__m256i __A)
+{
+ return (__m256d) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd256_pd128 (__m256d __A)
+{
+ return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps256_ps128 (__m256 __A)
+{
+ return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_si128 (__m256i __A)
+{
+ return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A);
+}
+
+/* When cast is done from a 128 to 256-bit type, the low 128 bits of
+ the 256-bit result contain source parameter value and the upper 128
+ bits of the result are undefined. Those intrinsics shouldn't
+ generate any extra moves. */
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd128_pd256 (__m128d __A)
+{
+ return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps128_ps256 (__m128 __A)
+{
+ return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi128_si256 (__m128i __A)
+{
+ return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
+}
+
+#endif /* __AVX__ */
+
+#endif /* _GMMINTRIN_H_INCLUDED */
def_or_undef (parse_in, "__AES__");
if (isa_flag & OPTION_MASK_ISA_PCLMUL)
def_or_undef (parse_in, "__PCLMUL__");
+ if (isa_flag & OPTION_MASK_ISA_AVX)
+ def_or_undef (parse_in, "__AVX__");
+ if (isa_flag & OPTION_MASK_ISA_FMA)
+ def_or_undef (parse_in, "__FMA__");
if (isa_flag & OPTION_MASK_ISA_SSE4A)
def_or_undef (parse_in, "__SSE4A__");
if (isa_flag & OPTION_MASK_ISA_SSE5)
VECTOR_MODES (INT, 4); /* V4QI V2HI */
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
+VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
VECTOR_MODE (INT, DI, 1); /* V1DI */
VECTOR_MODE (INT, SI, 1); /* V1SI */
VECTOR_MODE (INT, QI, 2); /* V2QI */
-VECTOR_MODE (INT, DI, 4); /* V4DI */
-VECTOR_MODE (INT, SI, 8); /* V8SI */
-VECTOR_MODE (INT, HI, 16); /* V16HI */
-VECTOR_MODE (INT, QI, 32); /* V32QI */
-VECTOR_MODE (FLOAT, DF, 4); /* V4DF */
-VECTOR_MODE (FLOAT, SF, 8); /* V8SF */
+VECTOR_MODE (INT, DI, 8); /* V8DI */
+VECTOR_MODE (INT, HI, 32); /* V32HI */
+VECTOR_MODE (INT, QI, 64); /* V64QI */
+VECTOR_MODE (FLOAT, DF, 8); /* V8DF */
+VECTOR_MODE (FLOAT, SF, 16); /* V16SF */
+
+INT_MODE (OI, 32);
/* The symbol Pmode stands for one of the above machine modes (usually SImode).
The tm.h file specifies which one. It is not a distinct mode. */
extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot);
extern int ix86_attr_length_immediate_default (rtx, int);
extern int ix86_attr_length_address_default (rtx);
+extern int ix86_attr_length_vex_default (rtx, int, int);
extern enum machine_mode ix86_fp_compare_mode (enum rtx_code);
X86_64_NO_CLASS,
X86_64_INTEGER_CLASS,
X86_64_INTEGERSI_CLASS,
+ X86_64_AVX_CLASS,
X86_64_SSE_CLASS,
X86_64_SSESF_CLASS,
X86_64_SSEDF_CLASS,
(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
#define OPTION_MASK_ISA_SSE4_2_SET \
(OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
+#define OPTION_MASK_ISA_AVX_SET \
+ (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
+#define OPTION_MASK_ISA_FMA_SET \
+ (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
#define OPTION_MASK_ISA_SSE4_1_UNSET \
(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
-#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
+#define OPTION_MASK_ISA_SSE4_2_UNSET \
+ (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
+#define OPTION_MASK_ISA_AVX_UNSET \
+ (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
+#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
}
return true;
+ case OPT_mavx:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
+ }
+ return true;
+
+ case OPT_mfma:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
+ }
+ return true;
+
case OPT_msse4:
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
PTA_SSE4_2 = 1 << 15,
PTA_SSE5 = 1 << 16,
PTA_AES = 1 << 17,
- PTA_PCLMUL = 1 << 18
+ PTA_PCLMUL = 1 << 18,
+ PTA_AVX = 1 << 19,
+ PTA_FMA = 1 << 20
};
static struct pta
if (processor_alias_table[i].flags & PTA_SSE4_2
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
+ if (processor_alias_table[i].flags & PTA_AVX
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX;
+ if (processor_alias_table[i].flags & PTA_FMA
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
+ ix86_isa_flags |= OPTION_MASK_ISA_FMA;
if (processor_alias_table[i].flags & PTA_SSE4A
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
}
if (TARGET_MMX)
cum->mmx_nregs = MMX_REGPARM_MAX;
+ cum->warn_avx = true;
cum->warn_sse = true;
cum->warn_mmx = true;
cum->nregs = 0;
cum->sse_nregs = 0;
cum->mmx_nregs = 0;
+ cum->warn_avx = 0;
cum->warn_sse = 0;
cum->warn_mmx = 0;
return;
classes[0] = classes[1] = X86_64_INTEGER_CLASS;
return 2;
case CTImode:
+ case COImode:
+ case OImode:
return 0;
case SFmode:
if (!(bit_offset % 64))
case TCmode:
/* This modes is larger than 16 bytes. */
return 0;
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ classes[0] = X86_64_AVX_CLASS;
+ return 1;
case V4SFmode:
case V4SImode:
case V16QImode:
case X86_64_INTEGERSI_CLASS:
(*int_nregs)++;
break;
+ case X86_64_AVX_CLASS:
case X86_64_SSE_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
return gen_rtx_REG (mode, intreg[0]);
+ case X86_64_AVX_CLASS:
case X86_64_SSE_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
break;
/* FALLTHRU */
+ case OImode:
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
case TImode:
case V16QImode:
case V8HImode:
static void
function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- tree type, HOST_WIDE_INT words)
+ tree type, HOST_WIDE_INT words, int named)
{
int int_nregs, sse_nregs;
+ /* Unnamed 256bit vector mode parameters are passed on stack. */
+ if (!named && VALID_AVX256_REG_MODE (mode))
+ return;
+
if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
cum->words += words;
else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
void
function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- tree type, int named ATTRIBUTE_UNUSED)
+ tree type, int named)
{
HOST_WIDE_INT bytes, words;
if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
function_arg_advance_ms_64 (cum, bytes, words);
else if (TARGET_64BIT)
- function_arg_advance_64 (cum, mode, type, words);
+ function_arg_advance_64 (cum, mode, type, words, named);
else
function_arg_advance_32 (cum, mode, type, bytes, words);
}
enum machine_mode orig_mode, tree type,
HOST_WIDE_INT bytes, HOST_WIDE_INT words)
{
- static bool warnedsse, warnedmmx;
+ static bool warnedavx, warnedsse, warnedmmx;
/* Avoid the AL settings for the Unix64 ABI. */
if (mode == VOIDmode)
break;
/* FALLTHRU */
case TImode:
+ /* In 32bit, we pass TImode in xmm registers. */
case V16QImode:
case V8HImode:
case V4SImode:
}
break;
+ case OImode:
+ /* In 32bit, we pass OImode in ymm registers. */
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (!TARGET_AVX && !warnedavx && cum->warn_avx)
+ {
+ warnedavx = true;
+ warning (0, "AVX vector argument without AVX enabled "
+ "changes the ABI");
+ }
+ if (cum->sse_nregs)
+ return gen_reg_or_parallel (mode, orig_mode,
+ cum->sse_regno + FIRST_SSE_REG);
+ }
+ break;
+
case V8QImode:
case V4HImode:
case V2SImode:
static rtx
function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
- enum machine_mode orig_mode, tree type)
+ enum machine_mode orig_mode, tree type, int named)
{
+ static bool warnedavx;
+
/* Handle a hidden AL argument containing number of registers
for varargs x86-64 functions. */
if (mode == VOIDmode)
: cum->sse_regno)
: -1);
+ switch (mode)
+ {
+ default:
+ break;
+
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ /* In 64bit, we pass TImode in interger registers and OImode on
+ stack. */
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (!TARGET_AVX && !warnedavx && cum->warn_avx)
+ {
+ warnedavx = true;
+ warning (0, "AVX vector argument without AVX enabled "
+ "changes the ABI");
+ }
+ }
+
+ /* Unnamed 256bit vector mode parameters are passed on stack. */
+ if (!named)
+ return NULL;
+ break;
+ }
+
return construct_container (mode, orig_mode, type, 0, cum->nregs,
cum->sse_nregs,
&x86_64_int_parameter_registers [cum->regno],
if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
return function_arg_ms_64 (cum, mode, omode, named, bytes);
else if (TARGET_64BIT)
- return function_arg_64 (cum, mode, omode, type);
+ return function_arg_64 (cum, mode, omode, type, named);
else
return function_arg_32 (cum, mode, omode, type, bytes, words);
}
label_ref = gen_rtx_LABEL_REF (Pmode, label);
/* Compute address to jump to :
- label - eax*4 + nnamed_sse_arguments*4 */
+ label - eax*4 + nnamed_sse_arguments*4 Or
+ label - eax*5 + nnamed_sse_arguments*5 for AVX. */
tmp_reg = gen_reg_rtx (Pmode);
nsse_reg = gen_reg_rtx (Pmode);
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
gen_rtx_MULT (Pmode, nsse_reg,
GEN_INT (4))));
+
+ /* vmovaps is one byte longer than movaps. */
+ if (TARGET_AVX)
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ gen_rtx_PLUS (Pmode, tmp_reg,
+ nsse_reg)));
+
if (cum->sse_regno)
emit_move_insn
(nsse_reg,
gen_rtx_CONST (DImode,
gen_rtx_PLUS (DImode,
label_ref,
- GEN_INT (cum->sse_regno * 4))));
+ GEN_INT (cum->sse_regno
+ * (TARGET_AVX ? 5 : 4)))));
else
emit_move_insn (nsse_reg, label_ref);
emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
/* Compute address of memory block we save into. We always use pointer
pointing 127 bytes after first byte to store - this is needed to keep
- instruction size limited by 4 bytes. */
+ instruction size limited by 4 bytes (5 bytes for AVX) with one
+ byte displacement. */
tmp_reg = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
plus_constant (save_area,
rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
nat_mode = type_natural_mode (type);
- container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
- X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
- intreg, 0);
+ switch (nat_mode)
+ {
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ /* Unnamed 256bit vector mode parameters are passed on stack. */
+ if (ix86_cfun_abi () == SYSV_ABI)
+ {
+ container = NULL;
+ break;
+ }
+
+ default:
+ container = construct_container (nat_mode, TYPE_MODE (type),
+ type, 0, X86_64_REGPARM_MAX,
+ X86_64_SSE_REGPARM_MAX, intreg,
+ 0);
+ break;
+ }
/* Pull the value out of the saved registers. */
}
}
-/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
- */
+/* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
+ SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
+ modes and AVX is enabled. */
+
int
standard_sse_constant_p (rtx x)
{
if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
return 1;
- if (vector_all_ones_operand (x, mode)
- && standard_sse_mode_p (mode))
- return TARGET_SSE2 ? 2 : -1;
+ if (vector_all_ones_operand (x, mode))
+ {
+ if (standard_sse_mode_p (mode))
+ return TARGET_SSE2 ? 2 : -2;
+ else if (VALID_AVX256_REG_MODE (mode))
+ return TARGET_AVX ? 3 : -3;
+ }
return 0;
}
switch (standard_sse_constant_p (x))
{
case 1:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "xorps\t%0, %0";
- else if (get_attr_mode (insn) == MODE_V2DF)
- return "xorpd\t%0, %0";
- else
- return "pxor\t%0, %0";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+ case MODE_V2DF:
+ return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
+ case MODE_TI:
+ return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+ case MODE_V8SF:
+ return "vxorps\t%x0, %x0, %x0";
+ case MODE_V4DF:
+ return "vxorpd\t%x0, %x0, %x0";
+ case MODE_OI:
+ return "vpxor\t%x0, %x0, %x0";
+ default:
+ gcc_unreachable ();
+ }
case 2:
- return "pcmpeqd\t%0, %0";
+ if (TARGET_AVX)
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ case MODE_V2DF:
+ case MODE_TI:
+ return "vpcmpeqd\t%0, %0, %0";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ else
+ return "pcmpeqd\t%0, %0";
}
gcc_unreachable ();
}
If CODE is 'b', pretend the mode is QImode.
If CODE is 'k', pretend the mode is SImode.
If CODE is 'q', pretend the mode is DImode.
+ If CODE is 'x', pretend the mode is V4SFmode.
+ If CODE is 't', pretend the mode is V8SFmode.
If CODE is 'h', pretend the reg is the 'high' byte register.
- If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
+ If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+ If CODE is 'd', duplicate the operand for AVX instruction.
+ */
void
print_reg (rtx x, int code, FILE *file)
{
+ const char *reg;
+ bool duplicated = code == 'd' && TARGET_AVX;
+
gcc_assert (x == pc_rtx
|| (REGNO (x) != ARG_POINTER_REGNUM
&& REGNO (x) != FRAME_POINTER_REGNUM
code = 3;
else if (code == 'h')
code = 0;
+ else if (code == 'x')
+ code = 16;
+ else if (code == 't')
+ code = 32;
else
code = GET_MODE_SIZE (GET_MODE (x));
}
return;
}
+
+ reg = NULL;
switch (code)
{
case 3:
if (STACK_TOP_P (x))
{
- fputs ("st(0)", file);
+ reg = "st(0)";
break;
}
/* FALLTHRU */
case 16:
case 2:
normal:
- fputs (hi_reg_name[REGNO (x)], file);
+ reg = hi_reg_name[REGNO (x)];
break;
case 1:
if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
goto normal;
- fputs (qi_reg_name[REGNO (x)], file);
+ reg = qi_reg_name[REGNO (x)];
break;
case 0:
if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
goto normal;
- fputs (qi_high_reg_name[REGNO (x)], file);
+ reg = qi_high_reg_name[REGNO (x)];
+ break;
+ case 32:
+ if (SSE_REG_P (x))
+ {
+ gcc_assert (!duplicated);
+ putc ('y', file);
+ fputs (hi_reg_name[REGNO (x)] + 1, file);
+ return;
+ }
break;
default:
gcc_unreachable ();
}
+
+ fputs (reg, file);
+ if (duplicated)
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ fprintf (file, ", %%%s", reg);
+ else
+ fprintf (file, ", %s", reg);
+ }
}
/* Locate some local-dynamic symbol still in use by this function
w -- likewise, print the HImode name of the register.
k -- likewise, print the SImode name of the register.
q -- likewise, print the DImode name of the register.
+ x -- likewise, print the V4SFmode name of the register.
+ t -- likewise, print the V8SFmode name of the register.
h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
y -- print "st(0)" instead of "st" as a register.
+ d -- print duplicated register operand for AVX instruction.
D -- print condition for SSE cmp instruction.
P -- if PIC, print an @PLT suffix.
X -- don't print any sort of PIC '@' suffix for a symbol.
gcc_unreachable ();
}
+ case 'd':
case 'b':
case 'w':
case 'k':
case 'q':
case 'h':
+ case 't':
case 'y':
+ case 'x':
case 'X':
case 'P':
break;
/* Little bit of braindamage here. The SSE compare instructions
does use completely different names for the comparisons that the
fp conditional moves. */
- switch (GET_CODE (x))
+ if (TARGET_AVX)
{
- case EQ:
- case UNEQ:
- fputs ("eq", file);
- break;
- case LT:
- case UNLT:
- fputs ("lt", file);
- break;
- case LE:
- case UNLE:
- fputs ("le", file);
- break;
- case UNORDERED:
- fputs ("unord", file);
- break;
- case NE:
- case LTGT:
- fputs ("neq", file);
- break;
- case UNGE:
- case GE:
- fputs ("nlt", file);
- break;
- case UNGT:
- case GT:
- fputs ("nle", file);
- break;
- case ORDERED:
- fputs ("ord", file);
- break;
- default:
- gcc_unreachable ();
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ fputs ("eq", file);
+ break;
+ case UNEQ:
+ fputs ("eq_us", file);
+ break;
+ case LT:
+ fputs ("lt", file);
+ break;
+ case UNLT:
+ fputs ("nge", file);
+ break;
+ case LE:
+ fputs ("le", file);
+ break;
+ case UNLE:
+ fputs ("ngt", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case NE:
+ fputs ("neq", file);
+ break;
+ case LTGT:
+ fputs ("neq_oq", file);
+ break;
+ case GE:
+ fputs ("ge", file);
+ break;
+ case UNGE:
+ fputs ("nlt", file);
+ break;
+ case GT:
+ fputs ("gt", file);
+ break;
+ case UNGT:
+ fputs ("nle", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ case UNEQ:
+ fputs ("eq", file);
+ break;
+ case LT:
+ case UNLT:
+ fputs ("lt", file);
+ break;
+ case LE:
+ case UNLE:
+ fputs ("le", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case NE:
+ case LTGT:
+ fputs ("neq", file);
+ break;
+ case UNGE:
+ case GE:
+ fputs ("nlt", file);
+ break;
+ case UNGT:
+ case GT:
+ fputs ("nle", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
}
return;
case 'O':
const char *
output_387_binary_op (rtx insn, rtx *operands)
{
- static char buf[30];
+ static char buf[40];
const char *p;
const char *ssep;
int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
p = "fiadd";
else
p = "fadd";
- ssep = "add";
+ ssep = "vadd";
break;
case MINUS:
p = "fisub";
else
p = "fsub";
- ssep = "sub";
+ ssep = "vsub";
break;
case MULT:
p = "fimul";
else
p = "fmul";
- ssep = "mul";
+ ssep = "vmul";
break;
case DIV:
p = "fidiv";
else
p = "fdiv";
- ssep = "div";
+ ssep = "vdiv";
break;
default:
if (is_sse)
{
- strcpy (buf, ssep);
- if (GET_MODE (operands[0]) == SFmode)
- strcat (buf, "ss\t{%2, %0|%0, %2}");
- else
- strcat (buf, "sd\t{%2, %0|%0, %2}");
+ if (TARGET_AVX)
+ {
+ strcpy (buf, ssep);
+ if (GET_MODE (operands[0]) == SFmode)
+ strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
+ else
+ strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
+ }
+ else
+ {
+ strcpy (buf, ssep + 1);
+ if (GET_MODE (operands[0]) == SFmode)
+ strcat (buf, "ss\t{%2, %0|%0, %2}");
+ else
+ strcat (buf, "sd\t{%2, %0|%0, %2}");
+ }
return buf;
}
strcpy (buf, p);
if (is_sse)
{
+ static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
+ static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
+ static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
+ static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
+
if (GET_MODE (operands[0]) == SFmode)
if (unordered_p)
- return "ucomiss\t{%1, %0|%0, %1}";
+ return &ucomiss[TARGET_AVX ? 0 : 1];
else
- return "comiss\t{%1, %0|%0, %1}";
+ return &comiss[TARGET_AVX ? 0 : 1];
else
if (unordered_p)
- return "ucomisd\t{%1, %0|%0, %1}";
+ return &ucomisd[TARGET_AVX ? 0 : 1];
else
- return "comisd\t{%1, %0|%0, %1}";
+ return &comisd[TARGET_AVX ? 0 : 1];
}
gcc_assert (STACK_TOP_P (cmp_op0));
op0 = operands[0];
op1 = operands[1];
+ if (TARGET_AVX)
+ {
+ switch (GET_MODE_CLASS (mode))
+ {
+ case MODE_VECTOR_INT:
+ case MODE_INT:
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 16:
+ op0 = gen_lowpart (V16QImode, op0);
+ op1 = gen_lowpart (V16QImode, op1);
+ emit_insn (gen_avx_movdqu (op0, op1));
+ break;
+ case 32:
+ op0 = gen_lowpart (V32QImode, op0);
+ op1 = gen_lowpart (V32QImode, op1);
+ emit_insn (gen_avx_movdqu256 (op0, op1));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ break;
+ case MODE_VECTOR_FLOAT:
+ op0 = gen_lowpart (mode, op0);
+ op1 = gen_lowpart (mode, op1);
+
+ switch (mode)
+ {
+ case V4SFmode:
+ emit_insn (gen_avx_movups (op0, op1));
+ break;
+ case V8SFmode:
+ emit_insn (gen_avx_movups256 (op0, op1));
+ break;
+ case V2DFmode:
+ emit_insn (gen_avx_movupd (op0, op1));
+ break;
+ case V4DFmode:
+ emit_insn (gen_avx_movupd256 (op0, op1));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+ }
+
if (MEM_P (op1))
{
/* If we're optimizing for size, movups is the smallest. */
}
return 0;
}
+
+/* Compute default value for "length_vex" attribute. It includes
+ 2 or 3 byte VEX prefix and 1 opcode byte. */
+
+int
+ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
+ int has_vex_w)
+{
+ int i;
+
+ /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
+ byte VEX prefix. */
+ if (!has_0f_opcode || has_vex_w)
+ return 3 + 1;
+
+ /* We can always use 2 byte VEX prefix in 32bit. */
+ if (!TARGET_64BIT)
+ return 2 + 1;
+
+ extract_insn_cached (insn);
+
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (REG_P (recog_data.operand[i]))
+ {
+ /* REX.W bit uses 3 byte VEX prefix. */
+ if (GET_MODE (recog_data.operand[i]) == DImode)
+ return 3 + 1;
+ }
+ else
+ {
+ /* REX.X or REX.B bits use 3 byte VEX prefix. */
+ if (MEM_P (recog_data.operand[i])
+ && x86_extended_reg_mentioned_p (recog_data.operand[i]))
+ return 3 + 1;
+ }
+
+ return 2 + 1;
+}
\f
/* Return the maximum number of instructions a cpu can issue. */
/* PCLMUL instruction */
IX86_BUILTIN_PCLMULQDQ128,
+ /* AVX */
+ IX86_BUILTIN_ADDPD256,
+ IX86_BUILTIN_ADDPS256,
+ IX86_BUILTIN_ADDSUBPD256,
+ IX86_BUILTIN_ADDSUBPS256,
+ IX86_BUILTIN_ANDPD256,
+ IX86_BUILTIN_ANDPS256,
+ IX86_BUILTIN_ANDNPD256,
+ IX86_BUILTIN_ANDNPS256,
+ IX86_BUILTIN_BLENDPD256,
+ IX86_BUILTIN_BLENDPS256,
+ IX86_BUILTIN_BLENDVPD256,
+ IX86_BUILTIN_BLENDVPS256,
+ IX86_BUILTIN_DIVPD256,
+ IX86_BUILTIN_DIVPS256,
+ IX86_BUILTIN_DPPS256,
+ IX86_BUILTIN_HADDPD256,
+ IX86_BUILTIN_HADDPS256,
+ IX86_BUILTIN_HSUBPD256,
+ IX86_BUILTIN_HSUBPS256,
+ IX86_BUILTIN_MAXPD256,
+ IX86_BUILTIN_MAXPS256,
+ IX86_BUILTIN_MINPD256,
+ IX86_BUILTIN_MINPS256,
+ IX86_BUILTIN_MULPD256,
+ IX86_BUILTIN_MULPS256,
+ IX86_BUILTIN_ORPD256,
+ IX86_BUILTIN_ORPS256,
+ IX86_BUILTIN_SHUFPD256,
+ IX86_BUILTIN_SHUFPS256,
+ IX86_BUILTIN_SUBPD256,
+ IX86_BUILTIN_SUBPS256,
+ IX86_BUILTIN_XORPD256,
+ IX86_BUILTIN_XORPS256,
+ IX86_BUILTIN_CMPSD,
+ IX86_BUILTIN_CMPSS,
+ IX86_BUILTIN_CMPPD,
+ IX86_BUILTIN_CMPPS,
+ IX86_BUILTIN_CMPPD256,
+ IX86_BUILTIN_CMPPS256,
+ IX86_BUILTIN_CVTDQ2PD256,
+ IX86_BUILTIN_CVTDQ2PS256,
+ IX86_BUILTIN_CVTPD2PS256,
+ IX86_BUILTIN_CVTPS2DQ256,
+ IX86_BUILTIN_CVTPS2PD256,
+ IX86_BUILTIN_CVTTPD2DQ256,
+ IX86_BUILTIN_CVTPD2DQ256,
+ IX86_BUILTIN_CVTTPS2DQ256,
+ IX86_BUILTIN_EXTRACTF128PD256,
+ IX86_BUILTIN_EXTRACTF128PS256,
+ IX86_BUILTIN_EXTRACTF128SI256,
+ IX86_BUILTIN_VZEROALL,
+ IX86_BUILTIN_VZEROUPPER,
+ IX86_BUILTIN_VZEROUPPER_REX64,
+ IX86_BUILTIN_VPERMILVARPD,
+ IX86_BUILTIN_VPERMILVARPS,
+ IX86_BUILTIN_VPERMILVARPD256,
+ IX86_BUILTIN_VPERMILVARPS256,
+ IX86_BUILTIN_VPERMILPD,
+ IX86_BUILTIN_VPERMILPS,
+ IX86_BUILTIN_VPERMILPD256,
+ IX86_BUILTIN_VPERMILPS256,
+ IX86_BUILTIN_VPERMIL2PD,
+ IX86_BUILTIN_VPERMIL2PS,
+ IX86_BUILTIN_VPERMIL2PD256,
+ IX86_BUILTIN_VPERMIL2PS256,
+ IX86_BUILTIN_VPERM2F128PD256,
+ IX86_BUILTIN_VPERM2F128PS256,
+ IX86_BUILTIN_VPERM2F128SI256,
+ IX86_BUILTIN_VBROADCASTSS,
+ IX86_BUILTIN_VBROADCASTSD256,
+ IX86_BUILTIN_VBROADCASTSS256,
+ IX86_BUILTIN_VBROADCASTPD256,
+ IX86_BUILTIN_VBROADCASTPS256,
+ IX86_BUILTIN_VINSERTF128PD256,
+ IX86_BUILTIN_VINSERTF128PS256,
+ IX86_BUILTIN_VINSERTF128SI256,
+ IX86_BUILTIN_LOADUPD256,
+ IX86_BUILTIN_LOADUPS256,
+ IX86_BUILTIN_STOREUPD256,
+ IX86_BUILTIN_STOREUPS256,
+ IX86_BUILTIN_LDDQU256,
+ IX86_BUILTIN_LOADDQU256,
+ IX86_BUILTIN_STOREDQU256,
+ IX86_BUILTIN_MASKLOADPD,
+ IX86_BUILTIN_MASKLOADPS,
+ IX86_BUILTIN_MASKSTOREPD,
+ IX86_BUILTIN_MASKSTOREPS,
+ IX86_BUILTIN_MASKLOADPD256,
+ IX86_BUILTIN_MASKLOADPS256,
+ IX86_BUILTIN_MASKSTOREPD256,
+ IX86_BUILTIN_MASKSTOREPS256,
+ IX86_BUILTIN_MOVSHDUP256,
+ IX86_BUILTIN_MOVSLDUP256,
+ IX86_BUILTIN_MOVDDUP256,
+
+ IX86_BUILTIN_SQRTPD256,
+ IX86_BUILTIN_SQRTPS256,
+ IX86_BUILTIN_SQRTPS_NR256,
+ IX86_BUILTIN_RSQRTPS256,
+ IX86_BUILTIN_RSQRTPS_NR256,
+
+ IX86_BUILTIN_RCPPS256,
+
+ IX86_BUILTIN_ROUNDPD256,
+ IX86_BUILTIN_ROUNDPS256,
+
+ IX86_BUILTIN_UNPCKHPD256,
+ IX86_BUILTIN_UNPCKLPD256,
+ IX86_BUILTIN_UNPCKHPS256,
+ IX86_BUILTIN_UNPCKLPS256,
+
+ IX86_BUILTIN_SI256_SI,
+ IX86_BUILTIN_PS256_PS,
+ IX86_BUILTIN_PD256_PD,
+ IX86_BUILTIN_SI_SI256,
+ IX86_BUILTIN_PS_PS256,
+ IX86_BUILTIN_PD_PD256,
+
+ IX86_BUILTIN_VTESTZPD,
+ IX86_BUILTIN_VTESTCPD,
+ IX86_BUILTIN_VTESTNZCPD,
+ IX86_BUILTIN_VTESTZPS,
+ IX86_BUILTIN_VTESTCPS,
+ IX86_BUILTIN_VTESTNZCPS,
+ IX86_BUILTIN_VTESTZPD256,
+ IX86_BUILTIN_VTESTCPD256,
+ IX86_BUILTIN_VTESTNZCPD256,
+ IX86_BUILTIN_VTESTZPS256,
+ IX86_BUILTIN_VTESTCPS256,
+ IX86_BUILTIN_VTESTNZCPS256,
+ IX86_BUILTIN_PTESTZ256,
+ IX86_BUILTIN_PTESTC256,
+ IX86_BUILTIN_PTESTNZC256,
+
+ IX86_BUILTIN_MOVMSKPD256,
+ IX86_BUILTIN_MOVMSKPS256,
+
/* TFmode support builtins. */
IX86_BUILTIN_INFQ,
IX86_BUILTIN_FABSQ,
{
SPECIAL_FTYPE_UNKNOWN,
VOID_FTYPE_VOID,
+ V32QI_FTYPE_PCCHAR,
V16QI_FTYPE_PCCHAR,
+ V8SF_FTYPE_PCV4SF,
+ V8SF_FTYPE_PCFLOAT,
+ V4DF_FTYPE_PCV2DF,
+ V4DF_FTYPE_PCDOUBLE,
V4SF_FTYPE_PCFLOAT,
V2DF_FTYPE_PCDOUBLE,
+ V8SF_FTYPE_PCV8SF_V8SF,
+ V4DF_FTYPE_PCV4DF_V4DF,
V4SF_FTYPE_V4SF_PCV2SF,
+ V4SF_FTYPE_PCV4SF_V4SF,
V2DF_FTYPE_V2DF_PCDOUBLE,
+ V2DF_FTYPE_PCV2DF_V2DF,
V2DI_FTYPE_PV2DI,
VOID_FTYPE_PV2SF_V4SF,
VOID_FTYPE_PV2DI_V2DI,
+ VOID_FTYPE_PCHAR_V32QI,
VOID_FTYPE_PCHAR_V16QI,
+ VOID_FTYPE_PFLOAT_V8SF,
VOID_FTYPE_PFLOAT_V4SF,
+ VOID_FTYPE_PDOUBLE_V4DF,
VOID_FTYPE_PDOUBLE_V2DF,
VOID_FTYPE_PDI_DI,
- VOID_FTYPE_PINT_INT
+ VOID_FTYPE_PINT_INT,
+ VOID_FTYPE_PV8SF_V8SF_V8SF,
+ VOID_FTYPE_PV4DF_V4DF_V4DF,
+ VOID_FTYPE_PV4SF_V4SF_V4SF,
+ VOID_FTYPE_PV2DF_V2DF_V2DF
};
/* Builtin types */
FLOAT128_FTYPE_FLOAT128,
FLOAT_FTYPE_FLOAT,
FLOAT128_FTYPE_FLOAT128_FLOAT128,
+ INT_FTYPE_V8SF_V8SF_PTEST,
+ INT_FTYPE_V4DI_V4DI_PTEST,
+ INT_FTYPE_V4DF_V4DF_PTEST,
+ INT_FTYPE_V4SF_V4SF_PTEST,
INT_FTYPE_V2DI_V2DI_PTEST,
+ INT_FTYPE_V2DF_V2DF_PTEST,
INT64_FTYPE_V4SF,
INT64_FTYPE_V2DF,
INT_FTYPE_V16QI,
INT_FTYPE_V8QI,
+ INT_FTYPE_V8SF,
+ INT_FTYPE_V4DF,
INT_FTYPE_V4SF,
INT_FTYPE_V2DF,
V16QI_FTYPE_V16QI,
+ V8SI_FTYPE_V8SF,
+ V8SI_FTYPE_V4SI,
V8HI_FTYPE_V8HI,
V8HI_FTYPE_V16QI,
V8QI_FTYPE_V8QI,
+ V8SF_FTYPE_V8SF,
+ V8SF_FTYPE_V8SI,
+ V8SF_FTYPE_V4SF,
V4SI_FTYPE_V4SI,
V4SI_FTYPE_V16QI,
+ V4SI_FTYPE_V8SI,
V4SI_FTYPE_V8HI,
+ V4SI_FTYPE_V4DF,
V4SI_FTYPE_V4SF,
V4SI_FTYPE_V2DF,
V4HI_FTYPE_V4HI,
+ V4DF_FTYPE_V4DF,
+ V4DF_FTYPE_V4SI,
+ V4DF_FTYPE_V4SF,
+ V4DF_FTYPE_V2DF,
+ V4SF_FTYPE_V4DF,
V4SF_FTYPE_V4SF,
V4SF_FTYPE_V4SF_VEC_MERGE,
+ V4SF_FTYPE_V8SF,
V4SF_FTYPE_V4SI,
V4SF_FTYPE_V2DF,
V2DI_FTYPE_V2DI,
V2DF_FTYPE_V2DF,
V2DF_FTYPE_V2DF_VEC_MERGE,
V2DF_FTYPE_V4SI,
+ V2DF_FTYPE_V4DF,
V2DF_FTYPE_V4SF,
V2DF_FTYPE_V2SI,
V2SI_FTYPE_V2SI,
V8HI_FTYPE_V16QI_V16QI,
V8HI_FTYPE_V4SI_V4SI,
V8HI_FTYPE_V8HI_SI_COUNT,
+ V8SF_FTYPE_V8SF_V8SF,
+ V8SF_FTYPE_V8SF_V8SI,
V4SI_FTYPE_V4SI_V4SI,
V4SI_FTYPE_V4SI_V4SI_COUNT,
V4SI_FTYPE_V8HI_V8HI,
V4HI_FTYPE_V8QI_V8QI,
V4HI_FTYPE_V2SI_V2SI,
V4HI_FTYPE_V4HI_SI_COUNT,
+ V4DF_FTYPE_V4DF_V4DF,
+ V4DF_FTYPE_V4DF_V4DI,
V4SF_FTYPE_V4SF_V4SF,
V4SF_FTYPE_V4SF_V4SF_SWAP,
+ V4SF_FTYPE_V4SF_V4SI,
V4SF_FTYPE_V4SF_V2SI,
V4SF_FTYPE_V4SF_V2DF,
V4SF_FTYPE_V4SF_DI,
V2DF_FTYPE_V2DF_V2DF,
V2DF_FTYPE_V2DF_V2DF_SWAP,
V2DF_FTYPE_V2DF_V4SF,
+ V2DF_FTYPE_V2DF_V2DI,
V2DF_FTYPE_V2DF_DI,
V2DF_FTYPE_V2DF_SI,
V2SF_FTYPE_V2SF_V2SF,
V8HI_FTYPE_V8HI_INT,
V4SI_FTYPE_V4SI_INT,
V4HI_FTYPE_V4HI_INT,
+ V8SF_FTYPE_V8SF_INT,
+ V4SI_FTYPE_V8SI_INT,
+ V4SF_FTYPE_V8SF_INT,
+ V2DF_FTYPE_V4DF_INT,
+ V4DF_FTYPE_V4DF_INT,
V4SF_FTYPE_V4SF_INT,
V2DI_FTYPE_V2DI_INT,
V2DI2TI_FTYPE_V2DI_INT,
V2DF_FTYPE_V2DF_INT,
V16QI_FTYPE_V16QI_V16QI_V16QI,
+ V8SF_FTYPE_V8SF_V8SF_V8SF,
+ V4DF_FTYPE_V4DF_V4DF_V4DF,
V4SF_FTYPE_V4SF_V4SF_V4SF,
V2DF_FTYPE_V2DF_V2DF_V2DF,
V16QI_FTYPE_V16QI_V16QI_INT,
+ V8SI_FTYPE_V8SI_V8SI_INT,
+ V8SI_FTYPE_V8SI_V4SI_INT,
V8HI_FTYPE_V8HI_V8HI_INT,
+ V8SF_FTYPE_V8SF_V8SF_INT,
+ V8SF_FTYPE_V8SF_V4SF_INT,
V4SI_FTYPE_V4SI_V4SI_INT,
+ V4DF_FTYPE_V4DF_V4DF_INT,
+ V4DF_FTYPE_V4DF_V2DF_INT,
V4SF_FTYPE_V4SF_V4SF_INT,
V2DI_FTYPE_V2DI_V2DI_INT,
V2DI2TI_FTYPE_V2DI_V2DI_INT,
V1DI2DI_FTYPE_V1DI_V1DI_INT,
V2DF_FTYPE_V2DF_V2DF_INT,
+ V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
+ V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
+ V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
+ V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
V2DI_FTYPE_V2DI_UINT_UINT,
V2DI_FTYPE_V2DI_V2DI_UINT_UINT
};
/* SSE4A */
{ OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
{ OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+
+ /* AVX */
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
};
/* Builtins with variable number of arguments. */
/* PCLMUL */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
+
+ /* AVX */
+ { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
};
/* SSE5 */
float_type_node,
NULL_TREE);
+ /* AVX builtins */
+ tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
+ V32QImode);
+ tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
+ V8SImode);
+ tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
+ V8SFmode);
+ tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
+ V4DImode);
+ tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
+ V4DFmode);
+ tree v8sf_ftype_v8sf
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v8sf
+ = build_function_type_list (V8SI_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8si
+ = build_function_type_list (V8SF_type_node,
+ V8SI_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v4df
+ = build_function_type_list (V4SI_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4si
+ = build_function_type_list (V4DF_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4sf
+ = build_function_type_list (V4DF_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4df
+ = build_function_type_list (V4SF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v8si_int
+ = build_function_type_list (V4SI_type_node,
+ V8SI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v8sf_int
+ = build_function_type_list (V4SF_type_node,
+ V8SF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v4df_int
+ = build_function_type_list (V2DF_type_node,
+ V4DF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf_v8sf
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df_v4df
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v8si_v8si_int
+ = build_function_type_list (V8SI_type_node,
+ V8SI_type_node, V8SI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf_v8si_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ V8SI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df_v4di_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ V4DI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v4sf_v4si_int
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node,
+ V4SI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v2df_v2df_v2di_int
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node,
+ V2DI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_pcfloat
+ = build_function_type_list (V8SF_type_node,
+ pcfloat_type_node,
+ NULL_TREE);
+ tree v4df_ftype_pcdouble
+ = build_function_type_list (V4DF_type_node,
+ pcdouble_type_node,
+ NULL_TREE);
+ tree pcv4sf_type_node
+ = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
+ tree pcv2df_type_node
+ = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
+ tree v8sf_ftype_pcv4sf
+ = build_function_type_list (V8SF_type_node,
+ pcv4sf_type_node,
+ NULL_TREE);
+ tree v4df_ftype_pcv2df
+ = build_function_type_list (V4DF_type_node,
+ pcv2df_type_node,
+ NULL_TREE);
+ tree v32qi_ftype_pcchar
+ = build_function_type_list (V32QI_type_node,
+ pcchar_type_node,
+ NULL_TREE);
+ tree void_ftype_pchar_v32qi
+ = build_function_type_list (void_type_node,
+ pchar_type_node, V32QI_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v8si_v4si_int
+ = build_function_type_list (V8SI_type_node,
+ V8SI_type_node, V4SI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v4sf_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V4SF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v2df_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V2DF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree void_ftype_pfloat_v8sf
+ = build_function_type_list (void_type_node,
+ pfloat_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree void_ftype_pdouble_v4df
+ = build_function_type_list (void_type_node,
+ pdouble_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
+ tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
+ tree pv4df_type_node = build_pointer_type (V4DF_type_node);
+ tree pv2df_type_node = build_pointer_type (V2DF_type_node);
+ tree pcv8sf_type_node
+ = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
+ tree pcv4df_type_node
+ = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
+ tree v8sf_ftype_pcv8sf_v8sf
+ = build_function_type_list (V8SF_type_node,
+ pcv8sf_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_pcv4df_v4df
+ = build_function_type_list (V4DF_type_node,
+ pcv4df_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_pcv4sf_v4sf
+ = build_function_type_list (V4SF_type_node,
+ pcv4sf_type_node, V4SF_type_node,
+ NULL_TREE);
+ tree v2df_ftype_pcv2df_v2df
+ = build_function_type_list (V2DF_type_node,
+ pcv2df_type_node, V2DF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv8sf_v8sf_v8sf
+ = build_function_type_list (void_type_node,
+ pv8sf_type_node, V8SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv4df_v4df_v4df
+ = build_function_type_list (void_type_node,
+ pv4df_type_node, V4DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv4sf_v4sf_v4sf
+ = build_function_type_list (void_type_node,
+ pv4sf_type_node, V4SF_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv2df_v2df_v2df
+ = build_function_type_list (void_type_node,
+ pv2df_type_node, V2DF_type_node,
+ V2DF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v2df
+ = build_function_type_list (V4DF_type_node,
+ V2DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v4sf
+ = build_function_type_list (V8SF_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v4si
+ = build_function_type_list (V8SI_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v4df
+ = build_function_type_list (V2DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v8sf
+ = build_function_type_list (V4SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v8si
+ = build_function_type_list (V4SI_type_node,
+ V8SI_type_node,
+ NULL_TREE);
+ tree int_ftype_v4df
+ = build_function_type_list (integer_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree int_ftype_v8sf
+ = build_function_type_list (integer_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree int_ftype_v8sf_v8sf
+ = build_function_type_list (integer_type_node,
+ V8SF_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree int_ftype_v4di_v4di
+ = build_function_type_list (integer_type_node,
+ V4DI_type_node, V4DI_type_node,
+ NULL_TREE);
+ tree int_ftype_v4df_v4df
+ = build_function_type_list (integer_type_node,
+ V4DF_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8si
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SI_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4di
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DI_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v4si
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SI_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2di
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DI_type_node, NULL_TREE);
+
tree ftype;
/* Add all special builtins with variable number of operands. */
case VOID_FTYPE_VOID:
type = void_ftype_void;
break;
+ case V32QI_FTYPE_PCCHAR:
+ type = v32qi_ftype_pcchar;
+ break;
case V16QI_FTYPE_PCCHAR:
type = v16qi_ftype_pcchar;
break;
+ case V8SF_FTYPE_PCV4SF:
+ type = v8sf_ftype_pcv4sf;
+ break;
+ case V8SF_FTYPE_PCFLOAT:
+ type = v8sf_ftype_pcfloat;
+ break;
+ case V4DF_FTYPE_PCV2DF:
+ type = v4df_ftype_pcv2df;
+ break;
+ case V4DF_FTYPE_PCDOUBLE:
+ type = v4df_ftype_pcdouble;
+ break;
case V4SF_FTYPE_PCFLOAT:
type = v4sf_ftype_pcfloat;
break;
case V2DF_FTYPE_PCDOUBLE:
type = v2df_ftype_pcdouble;
break;
+ case V8SF_FTYPE_PCV8SF_V8SF:
+ type = v8sf_ftype_pcv8sf_v8sf;
+ break;
+ case V4DF_FTYPE_PCV4DF_V4DF:
+ type = v4df_ftype_pcv4df_v4df;
+ break;
case V4SF_FTYPE_V4SF_PCV2SF:
type = v4sf_ftype_v4sf_pcv2sf;
break;
+ case V4SF_FTYPE_PCV4SF_V4SF:
+ type = v4sf_ftype_pcv4sf_v4sf;
+ break;
case V2DF_FTYPE_V2DF_PCDOUBLE:
type = v2df_ftype_v2df_pcdouble;
break;
+ case V2DF_FTYPE_PCV2DF_V2DF:
+ type = v2df_ftype_pcv2df_v2df;
+ break;
case VOID_FTYPE_PV2SF_V4SF:
type = void_ftype_pv2sf_v4sf;
break;
case VOID_FTYPE_PV2DI_V2DI:
type = void_ftype_pv2di_v2di;
break;
+ case VOID_FTYPE_PCHAR_V32QI:
+ type = void_ftype_pchar_v32qi;
+ break;
case VOID_FTYPE_PCHAR_V16QI:
type = void_ftype_pchar_v16qi;
break;
+ case VOID_FTYPE_PFLOAT_V8SF:
+ type = void_ftype_pfloat_v8sf;
+ break;
case VOID_FTYPE_PFLOAT_V4SF:
type = void_ftype_pfloat_v4sf;
break;
+ case VOID_FTYPE_PDOUBLE_V4DF:
+ type = void_ftype_pdouble_v4df;
+ break;
case VOID_FTYPE_PDOUBLE_V2DF:
type = void_ftype_pdouble_v2df;
break;
case VOID_FTYPE_PINT_INT:
type = void_ftype_pint_int;
break;
+ case VOID_FTYPE_PV8SF_V8SF_V8SF:
+ type = void_ftype_pv8sf_v8sf_v8sf;
+ break;
+ case VOID_FTYPE_PV4DF_V4DF_V4DF:
+ type = void_ftype_pv4df_v4df_v4df;
+ break;
+ case VOID_FTYPE_PV4SF_V4SF_V4SF:
+ type = void_ftype_pv4sf_v4sf_v4sf;
+ break;
+ case VOID_FTYPE_PV2DF_V2DF_V2DF:
+ type = void_ftype_pv2df_v2df_v2df;
+ break;
default:
gcc_unreachable ();
}
case FLOAT_FTYPE_FLOAT:
type = float_ftype_float;
break;
+ case INT_FTYPE_V8SF_V8SF_PTEST:
+ type = int_ftype_v8sf_v8sf;
+ break;
+ case INT_FTYPE_V4DI_V4DI_PTEST:
+ type = int_ftype_v4di_v4di;
+ break;
+ case INT_FTYPE_V4DF_V4DF_PTEST:
+ type = int_ftype_v4df_v4df;
+ break;
+ case INT_FTYPE_V4SF_V4SF_PTEST:
+ type = int_ftype_v4sf_v4sf;
+ break;
case INT_FTYPE_V2DI_V2DI_PTEST:
type = int_ftype_v2di_v2di;
break;
+ case INT_FTYPE_V2DF_V2DF_PTEST:
+ type = int_ftype_v2df_v2df;
+ break;
case INT64_FTYPE_V4SF:
type = int64_ftype_v4sf;
break;
case INT_FTYPE_V8QI:
type = int_ftype_v8qi;
break;
+ case INT_FTYPE_V8SF:
+ type = int_ftype_v8sf;
+ break;
+ case INT_FTYPE_V4DF:
+ type = int_ftype_v4df;
+ break;
case INT_FTYPE_V4SF:
type = int_ftype_v4sf;
break;
case V16QI_FTYPE_V16QI:
type = v16qi_ftype_v16qi;
break;
+ case V8SI_FTYPE_V8SF:
+ type = v8si_ftype_v8sf;
+ break;
+ case V8SI_FTYPE_V4SI:
+ type = v8si_ftype_v4si;
+ break;
case V8HI_FTYPE_V8HI:
type = v8hi_ftype_v8hi;
break;
case V8QI_FTYPE_V8QI:
type = v8qi_ftype_v8qi;
break;
+ case V8SF_FTYPE_V8SF:
+ type = v8sf_ftype_v8sf;
+ break;
+ case V8SF_FTYPE_V8SI:
+ type = v8sf_ftype_v8si;
+ break;
+ case V8SF_FTYPE_V4SF:
+ type = v8sf_ftype_v4sf;
+ break;
+ case V4SI_FTYPE_V4DF:
+ type = v4si_ftype_v4df;
+ break;
case V4SI_FTYPE_V4SI:
type = v4si_ftype_v4si;
break;
case V4SI_FTYPE_V16QI:
type = v4si_ftype_v16qi;
break;
+ case V4SI_FTYPE_V8SI:
+ type = v4si_ftype_v8si;
+ break;
case V4SI_FTYPE_V8HI:
type = v4si_ftype_v8hi;
break;
case V4HI_FTYPE_V4HI:
type = v4hi_ftype_v4hi;
break;
+ case V4DF_FTYPE_V4DF:
+ type = v4df_ftype_v4df;
+ break;
+ case V4DF_FTYPE_V4SI:
+ type = v4df_ftype_v4si;
+ break;
+ case V4DF_FTYPE_V4SF:
+ type = v4df_ftype_v4sf;
+ break;
+ case V4DF_FTYPE_V2DF:
+ type = v4df_ftype_v2df;
+ break;
case V4SF_FTYPE_V4SF:
case V4SF_FTYPE_V4SF_VEC_MERGE:
type = v4sf_ftype_v4sf;
break;
+ case V4SF_FTYPE_V8SF:
+ type = v4sf_ftype_v8sf;
+ break;
case V4SF_FTYPE_V4SI:
type = v4sf_ftype_v4si;
break;
+ case V4SF_FTYPE_V4DF:
+ type = v4sf_ftype_v4df;
+ break;
case V4SF_FTYPE_V2DF:
type = v4sf_ftype_v2df;
break;
case V2SI_FTYPE_V2SF:
type = v2si_ftype_v2sf;
break;
+ case V2DF_FTYPE_V4DF:
+ type = v2df_ftype_v4df;
+ break;
case V2DF_FTYPE_V4SF:
type = v2df_ftype_v4sf;
break;
case V8HI_FTYPE_V8HI_SI_COUNT:
type = v8hi_ftype_v8hi_int;
break;
+ case V8SF_FTYPE_V8SF_V8SF:
+ type = v8sf_ftype_v8sf_v8sf;
+ break;
+ case V8SF_FTYPE_V8SF_V8SI:
+ type = v8sf_ftype_v8sf_v8si;
+ break;
case V4SI_FTYPE_V4SI_V4SI:
case V4SI_FTYPE_V4SI_V4SI_COUNT:
type = v4si_ftype_v4si_v4si;
case V4HI_FTYPE_V4HI_SI_COUNT:
type = v4hi_ftype_v4hi_int;
break;
+ case V4DF_FTYPE_V4DF_V4DF:
+ type = v4df_ftype_v4df_v4df;
+ break;
+ case V4DF_FTYPE_V4DF_V4DI:
+ type = v4df_ftype_v4df_v4di;
+ break;
case V4SF_FTYPE_V4SF_V4SF:
case V4SF_FTYPE_V4SF_V4SF_SWAP:
type = v4sf_ftype_v4sf_v4sf;
break;
+ case V4SF_FTYPE_V4SF_V4SI:
+ type = v4sf_ftype_v4sf_v4si;
+ break;
case V4SF_FTYPE_V4SF_V2SI:
type = v4sf_ftype_v4sf_v2si;
break;
case V2DF_FTYPE_V2DF_V4SF:
type = v2df_ftype_v2df_v4sf;
break;
+ case V2DF_FTYPE_V2DF_V2DI:
+ type = v2df_ftype_v2df_v2di;
+ break;
case V2DF_FTYPE_V2DF_DI:
type = v2df_ftype_v2df_int64;
break;
case V8HI_FTYPE_V8HI_INT:
type = v8hi_ftype_v8hi_int;
break;
+ case V8SF_FTYPE_V8SF_INT:
+ type = v8sf_ftype_v8sf_int;
+ break;
case V4SI_FTYPE_V4SI_INT:
type = v4si_ftype_v4si_int;
break;
+ case V4SI_FTYPE_V8SI_INT:
+ type = v4si_ftype_v8si_int;
+ break;
case V4HI_FTYPE_V4HI_INT:
type = v4hi_ftype_v4hi_int;
break;
+ case V4DF_FTYPE_V4DF_INT:
+ type = v4df_ftype_v4df_int;
+ break;
case V4SF_FTYPE_V4SF_INT:
type = v4sf_ftype_v4sf_int;
break;
+ case V4SF_FTYPE_V8SF_INT:
+ type = v4sf_ftype_v8sf_int;
+ break;
case V2DI_FTYPE_V2DI_INT:
case V2DI2TI_FTYPE_V2DI_INT:
type = v2di_ftype_v2di_int;
case V2DF_FTYPE_V2DF_INT:
type = v2df_ftype_v2df_int;
break;
+ case V2DF_FTYPE_V4DF_INT:
+ type = v2df_ftype_v4df_int;
+ break;
case V16QI_FTYPE_V16QI_V16QI_V16QI:
type = v16qi_ftype_v16qi_v16qi_v16qi;
break;
+ case V8SF_FTYPE_V8SF_V8SF_V8SF:
+ type = v8sf_ftype_v8sf_v8sf_v8sf;
+ break;
+ case V4DF_FTYPE_V4DF_V4DF_V4DF:
+ type = v4df_ftype_v4df_v4df_v4df;
+ break;
case V4SF_FTYPE_V4SF_V4SF_V4SF:
type = v4sf_ftype_v4sf_v4sf_v4sf;
break;
case V16QI_FTYPE_V16QI_V16QI_INT:
type = v16qi_ftype_v16qi_v16qi_int;
break;
+ case V8SI_FTYPE_V8SI_V8SI_INT:
+ type = v8si_ftype_v8si_v8si_int;
+ break;
+ case V8SI_FTYPE_V8SI_V4SI_INT:
+ type = v8si_ftype_v8si_v4si_int;
+ break;
case V8HI_FTYPE_V8HI_V8HI_INT:
type = v8hi_ftype_v8hi_v8hi_int;
break;
+ case V8SF_FTYPE_V8SF_V8SF_INT:
+ type = v8sf_ftype_v8sf_v8sf_int;
+ break;
+ case V8SF_FTYPE_V8SF_V4SF_INT:
+ type = v8sf_ftype_v8sf_v4sf_int;
+ break;
case V4SI_FTYPE_V4SI_V4SI_INT:
type = v4si_ftype_v4si_v4si_int;
break;
+ case V4DF_FTYPE_V4DF_V4DF_INT:
+ type = v4df_ftype_v4df_v4df_int;
+ break;
+ case V4DF_FTYPE_V4DF_V2DF_INT:
+ type = v4df_ftype_v4df_v2df_int;
+ break;
case V4SF_FTYPE_V4SF_V4SF_INT:
type = v4sf_ftype_v4sf_v4sf_int;
break;
case V1DI2DI_FTYPE_V1DI_V1DI_INT:
type = v1di_ftype_v1di_v1di_int;
break;
+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+ type = v8sf_ftype_v8sf_v8sf_v8si_int;
+ break;
+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
+ type = v4df_ftype_v4df_v4df_v4di_int;
+ break;
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
+ type = v4sf_ftype_v4sf_v4sf_v4si_int;
+ break;
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
+ type = v2df_ftype_v2df_v2df_v2di_int;
+ break;
default:
gcc_unreachable ();
}
/* PCLMUL */
def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
+ /* AVX */
+ def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
+ TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
+
/* Access to the vec_init patterns. */
ftype = build_function_type_list (V2SI_type_node, integer_type_node,
integer_type_node, NULL_TREE);
switch ((enum ix86_builtin_type) d->flag)
{
+ case INT_FTYPE_V8SF_V8SF_PTEST:
+ case INT_FTYPE_V4DI_V4DI_PTEST:
+ case INT_FTYPE_V4DF_V4DF_PTEST:
+ case INT_FTYPE_V4SF_V4SF_PTEST:
case INT_FTYPE_V2DI_V2DI_PTEST:
+ case INT_FTYPE_V2DF_V2DF_PTEST:
return ix86_expand_sse_ptest (d, exp, target);
case FLOAT128_FTYPE_FLOAT128:
case FLOAT_FTYPE_FLOAT:
case INT64_FTYPE_V2DF:
case INT_FTYPE_V16QI:
case INT_FTYPE_V8QI:
+ case INT_FTYPE_V8SF:
+ case INT_FTYPE_V4DF:
case INT_FTYPE_V4SF:
case INT_FTYPE_V2DF:
case V16QI_FTYPE_V16QI:
+ case V8SI_FTYPE_V8SF:
+ case V8SI_FTYPE_V4SI:
case V8HI_FTYPE_V8HI:
case V8HI_FTYPE_V16QI:
case V8QI_FTYPE_V8QI:
+ case V8SF_FTYPE_V8SF:
+ case V8SF_FTYPE_V8SI:
+ case V8SF_FTYPE_V4SF:
case V4SI_FTYPE_V4SI:
case V4SI_FTYPE_V16QI:
case V4SI_FTYPE_V4SF:
+ case V4SI_FTYPE_V8SI:
case V4SI_FTYPE_V8HI:
+ case V4SI_FTYPE_V4DF:
case V4SI_FTYPE_V2DF:
case V4HI_FTYPE_V4HI:
+ case V4DF_FTYPE_V4DF:
+ case V4DF_FTYPE_V4SI:
+ case V4DF_FTYPE_V4SF:
+ case V4DF_FTYPE_V2DF:
case V4SF_FTYPE_V4SF:
case V4SF_FTYPE_V4SI:
+ case V4SF_FTYPE_V8SF:
+ case V4SF_FTYPE_V4DF:
case V4SF_FTYPE_V2DF:
case V2DI_FTYPE_V2DI:
case V2DI_FTYPE_V16QI:
case V2DI_FTYPE_V4SI:
case V2DF_FTYPE_V2DF:
case V2DF_FTYPE_V4SI:
+ case V2DF_FTYPE_V4DF:
case V2DF_FTYPE_V4SF:
case V2DF_FTYPE_V2SI:
case V2SI_FTYPE_V2SI:
case V8HI_FTYPE_V8HI_V8HI:
case V8HI_FTYPE_V16QI_V16QI:
case V8HI_FTYPE_V4SI_V4SI:
+ case V8SF_FTYPE_V8SF_V8SF:
+ case V8SF_FTYPE_V8SF_V8SI:
case V4SI_FTYPE_V4SI_V4SI:
case V4SI_FTYPE_V8HI_V8HI:
case V4SI_FTYPE_V4SF_V4SF:
case V4HI_FTYPE_V4HI_V4HI:
case V4HI_FTYPE_V8QI_V8QI:
case V4HI_FTYPE_V2SI_V2SI:
+ case V4DF_FTYPE_V4DF_V4DF:
+ case V4DF_FTYPE_V4DF_V4DI:
case V4SF_FTYPE_V4SF_V4SF:
+ case V4SF_FTYPE_V4SF_V4SI:
case V4SF_FTYPE_V4SF_V2SI:
case V4SF_FTYPE_V4SF_V2DF:
case V4SF_FTYPE_V4SF_DI:
case V2SI_FTYPE_V2SF_V2SF:
case V2DF_FTYPE_V2DF_V2DF:
case V2DF_FTYPE_V2DF_V4SF:
+ case V2DF_FTYPE_V2DF_V2DI:
case V2DF_FTYPE_V2DF_DI:
case V2DF_FTYPE_V2DF_SI:
case V2SF_FTYPE_V2SF_V2SF:
nargs_constant = 1;
break;
case V8HI_FTYPE_V8HI_INT:
+ case V8SF_FTYPE_V8SF_INT:
case V4SI_FTYPE_V4SI_INT:
+ case V4SI_FTYPE_V8SI_INT:
case V4HI_FTYPE_V4HI_INT:
+ case V4DF_FTYPE_V4DF_INT:
case V4SF_FTYPE_V4SF_INT:
+ case V4SF_FTYPE_V8SF_INT:
case V2DI_FTYPE_V2DI_INT:
case V2DF_FTYPE_V2DF_INT:
+ case V2DF_FTYPE_V4DF_INT:
nargs = 2;
nargs_constant = 1;
break;
case V16QI_FTYPE_V16QI_V16QI_V16QI:
+ case V8SF_FTYPE_V8SF_V8SF_V8SF:
+ case V4DF_FTYPE_V4DF_V4DF_V4DF:
case V4SF_FTYPE_V4SF_V4SF_V4SF:
case V2DF_FTYPE_V2DF_V2DF_V2DF:
nargs = 3;
break;
case V16QI_FTYPE_V16QI_V16QI_INT:
case V8HI_FTYPE_V8HI_V8HI_INT:
+ case V8SI_FTYPE_V8SI_V8SI_INT:
+ case V8SI_FTYPE_V8SI_V4SI_INT:
+ case V8SF_FTYPE_V8SF_V8SF_INT:
+ case V8SF_FTYPE_V8SF_V4SF_INT:
case V4SI_FTYPE_V4SI_V4SI_INT:
+ case V4DF_FTYPE_V4DF_V4DF_INT:
+ case V4DF_FTYPE_V4DF_V2DF_INT:
case V4SF_FTYPE_V4SF_V4SF_INT:
case V2DI_FTYPE_V2DI_V2DI_INT:
case V2DF_FTYPE_V2DF_V2DF_INT:
nargs = 3;
nargs_constant = 2;
break;
+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
+ nargs = 4;
+ nargs_constant = 1;
+ break;
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
nargs = 4;
nargs_constant = 2;
case CODE_FOR_sse4_1_roundsd:
case CODE_FOR_sse4_1_roundss:
case CODE_FOR_sse4_1_blendps:
+ case CODE_FOR_avx_blendpd256:
+ case CODE_FOR_avx_vpermilv4df:
+ case CODE_FOR_avx_roundpd256:
+ case CODE_FOR_avx_roundps256:
error ("the last argument must be a 4-bit immediate");
return const0_rtx;
case CODE_FOR_sse4_1_blendpd:
+ case CODE_FOR_avx_vpermilv2df:
+ case CODE_FOR_avx_vpermil2v2df3:
+ case CODE_FOR_avx_vpermil2v4sf3:
+ case CODE_FOR_avx_vpermil2v4df3:
+ case CODE_FOR_avx_vpermil2v8sf3:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
+ case CODE_FOR_avx_vextractf128v4df:
+ case CODE_FOR_avx_vextractf128v8sf:
+ case CODE_FOR_avx_vextractf128v8si:
+ case CODE_FOR_avx_vinsertf128v4df:
+ case CODE_FOR_avx_vinsertf128v8sf:
+ case CODE_FOR_avx_vinsertf128v8si:
+ error ("the last argument must be a 1-bit immediate");
+ return const0_rtx;
+
+ case CODE_FOR_avx_cmpsdv2df3:
+ case CODE_FOR_avx_cmpssv4sf3:
+ case CODE_FOR_avx_cmppdv2df3:
+ case CODE_FOR_avx_cmppsv4sf3:
+ case CODE_FOR_avx_cmppdv4df3:
+ case CODE_FOR_avx_cmppsv8sf3:
+ error ("the last argument must be a 5-bit immediate");
+ return const0_rtx;
+
default:
switch (nargs_constant)
{
emit_insn (GEN_FCN (icode) (target));
return 0;
case V2DI_FTYPE_PV2DI:
+ case V32QI_FTYPE_PCCHAR:
case V16QI_FTYPE_PCCHAR:
+ case V8SF_FTYPE_PCV4SF:
+ case V8SF_FTYPE_PCFLOAT:
case V4SF_FTYPE_PCFLOAT:
+ case V4DF_FTYPE_PCV2DF:
+ case V4DF_FTYPE_PCDOUBLE:
case V2DF_FTYPE_PCDOUBLE:
nargs = 1;
klass = load;
break;
case VOID_FTYPE_PV2SF_V4SF:
case VOID_FTYPE_PV2DI_V2DI:
+ case VOID_FTYPE_PCHAR_V32QI:
case VOID_FTYPE_PCHAR_V16QI:
+ case VOID_FTYPE_PFLOAT_V8SF:
case VOID_FTYPE_PFLOAT_V4SF:
+ case VOID_FTYPE_PDOUBLE_V4DF:
case VOID_FTYPE_PDOUBLE_V2DF:
case VOID_FTYPE_PDI_DI:
case VOID_FTYPE_PINT_INT:
klass = load;
memory = 1;
break;
+ case V8SF_FTYPE_PCV8SF_V8SF:
+ case V4DF_FTYPE_PCV4DF_V4DF:
+ case V4SF_FTYPE_PCV4SF_V4SF:
+ case V2DF_FTYPE_PCV2DF_V2DF:
+ nargs = 2;
+ klass = load;
+ memory = 0;
+ break;
+ case VOID_FTYPE_PV8SF_V8SF_V8SF:
+ case VOID_FTYPE_PV4DF_V4DF_V4DF:
+ case VOID_FTYPE_PV4SF_V4SF_V4SF:
+ case VOID_FTYPE_PV2DF_V2DF_V2DF:
+ nargs = 2;
+ klass = store;
+ /* Reserve memory operand for target. */
+ memory = ARRAY_SIZE (args);
+ break;
default:
gcc_unreachable ();
}
{
/* We implement the move patterns for all vector modes into and
out of SSE registers, even when no operation instructions
- are available. */
- return (VALID_SSE_REG_MODE (mode)
+ are available. OImode move is available only when AVX is
+ enabled. */
+ return ((TARGET_AVX && mode == OImode)
+ || VALID_AVX256_REG_MODE (mode)
+ || VALID_SSE_REG_MODE (mode)
|| VALID_SSE2_REG_MODE (mode)
|| VALID_MMX_REG_MODE (mode)
|| VALID_MMX_REG_MODE_3DNOW (mode));
bool
x86_extended_reg_mentioned_p (rtx insn)
{
- return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
+ return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
+ extended_reg_mentioned_1, NULL);
}
/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
rtx target, rtx val)
{
- enum machine_mode smode, wsmode, wvmode;
+ enum machine_mode hmode, smode, wsmode, wvmode;
rtx x;
switch (mode)
emit_move_insn (target, gen_lowpart (mode, x));
return true;
+ case V4DFmode:
+ hmode = V2DFmode;
+ goto half;
+ case V4DImode:
+ hmode = V2DImode;
+ goto half;
+ case V8SFmode:
+ hmode = V4SFmode;
+ goto half;
+ case V8SImode:
+ hmode = V4SImode;
+ goto half;
+ case V16HImode:
+ hmode = V8HImode;
+ goto half;
+ case V32QImode:
+ hmode = V16QImode;
+ goto half;
+half:
+ {
+ rtx tmp = gen_reg_rtx (hmode);
+ ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
+ }
+ return true;
+
default:
return false;
}
case V4HImode:
use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
break;
+ case V32QImode:
+ case V16HImode:
+ case V8SImode:
+ case V8SFmode:
+ case V4DImode:
+ case V4DFmode:
+ use_vector_set = TARGET_AVX;
+ break;
default:
break;
}
the general case. */
return false;
+ case V4DFmode:
+ case V4DImode:
+ case V8SFmode:
+ case V8SImode:
+ case V16HImode:
+ case V32QImode:
case V4SFmode:
case V4SImode:
case V8HImode:
rtx target, rtx *ops, int n)
{
enum machine_mode cmode, hmode = VOIDmode;
- rtx first[4], second[2];
+ rtx first[8], second[4];
rtvec v;
int i, j;
case 2:
switch (mode)
{
+ case V8SImode:
+ cmode = V4SImode;
+ break;
+ case V8SFmode:
+ cmode = V4SFmode;
+ break;
+ case V4DImode:
+ cmode = V2DImode;
+ break;
+ case V4DFmode:
+ cmode = V2DFmode;
+ break;
case V4SImode:
cmode = V2SImode;
break;
case 4:
switch (mode)
{
+ case V4DImode:
+ cmode = V2DImode;
+ break;
+ case V4DFmode:
+ cmode = V2DFmode;
+ break;
case V4SImode:
cmode = V2SImode;
break;
}
goto half;
+ case 8:
+ switch (mode)
+ {
+ case V8SImode:
+ cmode = V2SImode;
+ hmode = V4SImode;
+ break;
+ case V8SFmode:
+ cmode = V2SFmode;
+ hmode = V4SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ goto half;
+
half:
/* FIXME: We process inputs backward to help RA. PR 36222. */
i = n - 1;
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
rtx target, rtx vals)
{
- rtx ops[16];
+ rtx ops[32], op0, op1;
+ enum machine_mode half_mode = VOIDmode;
int n, i;
switch (mode)
break;
/* FALLTHRU */
+ case V8SFmode:
+ case V8SImode:
+ case V4DFmode:
+ case V4DImode:
case V4SFmode:
case V4SImode:
case V2DFmode:
ix86_expand_vector_init_concat (mode, target, ops, n);
return;
+ case V32QImode:
+ half_mode = V16QImode;
+ goto half;
+
+ case V16HImode:
+ half_mode = V8HImode;
+ goto half;
+
+half:
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ op0 = gen_reg_rtx (half_mode);
+ op1 = gen_reg_rtx (half_mode);
+ ix86_expand_vector_init_interleave (half_mode, op0, ops,
+ n >> 2);
+ ix86_expand_vector_init_interleave (half_mode, op1,
+ &ops [n >> 1], n >> 2);
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, op0, op1)));
+ return;
+
case V16QImode:
if (!TARGET_SSE4_1)
break;
{
enum machine_mode mode = GET_MODE (target);
enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ enum machine_mode half_mode;
bool use_vec_merge = false;
rtx tmp;
+ static rtx (*gen_extract[6][2]) (rtx, rtx)
+ = {
+ { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
+ { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
+ { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
+ { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
+ { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
+ { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
+ };
+ static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
+ = {
+ { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
+ { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
+ { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
+ { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
+ { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
+ { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
+ };
+ int i, j, n;
switch (mode)
{
break;
case V8QImode:
+ break;
+
+ case V32QImode:
+ half_mode = V16QImode;
+ j = 0;
+ n = 16;
+ goto half;
+
+ case V16HImode:
+ half_mode = V8HImode;
+ j = 1;
+ n = 8;
+ goto half;
+
+ case V8SImode:
+ half_mode = V4SImode;
+ j = 2;
+ n = 4;
+ goto half;
+
+ case V4DImode:
+ half_mode = V2DImode;
+ j = 3;
+ n = 2;
+ goto half;
+
+ case V8SFmode:
+ half_mode = V4SFmode;
+ j = 4;
+ n = 4;
+ goto half;
+
+ case V4DFmode:
+ half_mode = V2DFmode;
+ j = 5;
+ n = 2;
+ goto half;
+
+half:
+ /* Compute offset. */
+ i = elt / n;
+ elt %= n;
+
+ gcc_assert (i <= 1);
+
+ /* Extract the half. */
+ tmp = gen_reg_rtx (half_mode);
+ emit_insn ((*gen_extract[j][i]) (tmp, target));
+
+ /* Put val in tmp at elt. */
+ ix86_expand_vector_set (false, tmp, val, elt);
+
+ /* Put it back. */
+ emit_insn ((*gen_insert[j][i]) (target, target, tmp));
+ return;
+
default:
break;
}
return true;
if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
return true;
+ if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
+ return true;
if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
return true;
if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
#define TARGET_SSSE3 OPTION_ISA_SSSE3
#define TARGET_SSE4_1 OPTION_ISA_SSE4_1
#define TARGET_SSE4_2 OPTION_ISA_SSE4_2
+#define TARGET_AVX OPTION_ISA_AVX
+#define TARGET_FMA OPTION_ISA_FMA
#define TARGET_SSE4A OPTION_ISA_SSE4A
#define TARGET_SSE5 OPTION_ISA_SSE5
#define TARGET_ROUND OPTION_ISA_ROUND
Pentium+ prefers DFmode values to be aligned to 64 bit boundary
and Pentium Pro XFmode values at 128 bit boundaries. */
-#define BIGGEST_ALIGNMENT 128
+#define BIGGEST_ALIGNMENT (TARGET_AVX ? 256: 128)
/* Maximum stack alignment. */
#define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT
#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8)
+#define VALID_AVX256_REG_MODE(MODE) \
+ ((MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode \
+ || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode)
+
#define VALID_SSE2_REG_MODE(MODE) \
((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
|| (MODE) == V2DImode || (MODE) == DFmode)
|| (MODE) == V4HImode || (MODE) == V8QImode)
/* ??? No autovectorization into MMX or 3DNOW until we can reliably
- place emms and femms instructions. */
-#define UNITS_PER_SIMD_WORD(MODE) (TARGET_SSE ? 16 : UNITS_PER_WORD)
+ place emms and femms instructions.
+ FIXME: AVX has 32byte floating point vector operations and 16byte
+ integer vector operations. But vectorizer doesn't support
+ different sizes for integer and floating point vectors. We limit
+ vector size to 16byte. */
+#define UNITS_PER_SIMD_WORD(MODE) \
+ (TARGET_AVX ? (((MODE) == DFmode || (MODE) == SFmode) ? 16 : 16) \
+ : (TARGET_SSE ? 16 : UNITS_PER_WORD))
#define VALID_DFP_MODE_P(MODE) \
((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)
#define SSE_REG_MODE_P(MODE) \
((MODE) == TImode || (MODE) == V16QImode || (MODE) == TFmode \
|| (MODE) == V8HImode || (MODE) == V2DFmode || (MODE) == V2DImode \
- || (MODE) == V4SFmode || (MODE) == V4SImode)
+ || (MODE) == V4SFmode || (MODE) == V4SImode || (MODE) == V32QImode \
+ || (MODE) == V16HImode || (MODE) == V8SImode || (MODE) == V4DImode \
+ || (MODE) == V8SFmode || (MODE) == V4DFmode)
/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */
#define SSE_VEC_FLOAT_MODE_P(MODE) \
((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
+#define AVX_FLOAT_MODE_P(MODE) \
+ (TARGET_AVX && ((MODE) == SFmode || (MODE) == DFmode))
+
+#define AVX128_VEC_FLOAT_MODE_P(MODE) \
+ (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode))
+
+#define AVX256_VEC_FLOAT_MODE_P(MODE) \
+ (TARGET_AVX && ((MODE) == V8SFmode || (MODE) == V4DFmode))
+
+#define AVX_VEC_FLOAT_MODE_P(MODE) \
+ (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode \
+ || (MODE) == V8SFmode || (MODE) == V4DFmode))
+
#define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
#define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
int fastcall; /* fastcall calling convention is used */
int sse_words; /* # sse words passed so far */
int sse_nregs; /* # sse registers available for passing */
+ int warn_avx; /* True when we want to warn about AVX ABI. */
int warn_sse; /* True when we want to warn about SSE ABI. */
int warn_mmx; /* True when we want to warn about MMX ABI. */
int sse_regno; /* next available sse register number */
#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
ix86_output_addr_diff_elt ((FILE), (VALUE), (REL))
+/* When we see %v, we will print the 'v' prefix if TARGET_AVX is
+ true. */
+
+#define ASM_OUTPUT_AVX_PREFIX(STREAM, PTR) \
+{ \
+ if ((PTR)[0] == '%' && (PTR)[1] == 'v') \
+ { \
+ if (TARGET_AVX) \
+ (PTR) += 1; \
+ else \
+ (PTR) += 2; \
+ } \
+}
+
+/* A C statement or statements which output an assembler instruction
+ opcode to the stdio stream STREAM. The macro-operand PTR is a
+ variable of type `char *' which points to the opcode name in
+ its "internal" form--the form that is written in the machine
+ description. */
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+ ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR))
+
/* Under some conditions we need jump tables in the text section,
because the assembler cannot handle label differences between
sections. This is the case for x86_64 on Mach-O for example. */
; For PCLMUL support
(UNSPEC_PCLMUL 165)
+
+ ; For AVX support
+ (UNSPEC_PCMP 166)
+ (UNSPEC_VPERMIL 167)
+ (UNSPEC_VPERMIL2 168)
+ (UNSPEC_VPERMIL2F128 169)
+ (UNSPEC_MASKLOAD 170)
+ (UNSPEC_MASKSTORE 171)
+ (UNSPEC_CAST 172)
+ (UNSPEC_VTESTP 173)
])
(define_constants
(UNSPECV_LOCK 13)
(UNSPECV_PROLOGUE_USE 14)
(UNSPECV_CLD 15)
+ (UNSPECV_VZEROALL 16)
+ (UNSPECV_VZEROUPPER 17)
])
;; Constants to represent pcomtrue/pcomfalse variants
(FLAGS_REG 17)
(FPSR_REG 18)
(FPCR_REG 19)
+ (XMM0_REG 21)
+ (XMM1_REG 22)
+ (XMM2_REG 23)
+ (XMM3_REG 24)
+ (XMM4_REG 25)
+ (XMM5_REG 26)
+ (XMM6_REG 27)
+ (XMM7_REG 28)
(R10_REG 39)
(R11_REG 40)
(R13_REG 42)
+ (XMM8_REG 45)
+ (XMM9_REG 46)
+ (XMM10_REG 47)
+ (XMM11_REG 48)
+ (XMM12_REG 49)
+ (XMM13_REG 50)
+ (XMM14_REG 51)
+ (XMM15_REG 52)
])
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
;; Main data type used by the insn
(define_attr "mode"
- "unknown,none,QI,HI,SI,DI,TI,SF,DF,XF,TF,V4SF,V2DF,V2SF,V1DF"
+ "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
(const_string "unknown"))
;; The CPU unit operations uses.
;; There are also additional prefixes in SSSE3.
(define_attr "prefix_extra" "" (const_int 0))
+;; Prefix used: original, VEX or maybe VEX.
+(define_attr "prefix" "orig,vex,maybe_vex"
+ (if_then_else (eq_attr "mode" "OI,V8SF,V4DF")
+ (const_string "vex")
+ (const_string "orig")))
+
+;; There is a 8bit immediate for VEX.
+(define_attr "prefix_vex_imm8" "" (const_int 0))
+
+;; VEX W bit is used.
+(define_attr "prefix_vex_w" "" (const_int 0))
+
+;; The length of VEX prefix
+(define_attr "length_vex" ""
+ (if_then_else (eq_attr "prefix_0f" "1")
+ (if_then_else (eq_attr "prefix_vex_w" "1")
+ (symbol_ref "ix86_attr_length_vex_default (insn, 1, 1)")
+ (symbol_ref "ix86_attr_length_vex_default (insn, 1, 0)"))
+ (if_then_else (eq_attr "prefix_vex_w" "1")
+ (symbol_ref "ix86_attr_length_vex_default (insn, 0, 1)")
+ (symbol_ref "ix86_attr_length_vex_default (insn, 0, 0)"))))
+
;; Set when modrm byte is used.
(define_attr "modrm" ""
(cond [(eq_attr "type" "str,leave")
(eq_attr "unit" "i387")
(plus (const_int 2)
(plus (attr "prefix_data16")
- (attr "length_address")))]
+ (attr "length_address")))
+ (ior (eq_attr "prefix" "vex")
+ (and (eq_attr "prefix" "maybe_vex")
+ (ne (symbol_ref "TARGET_AVX") (const_int 0))))
+ (plus (attr "length_vex")
+ (plus (attr "prefix_vex_imm8")
+ (plus (attr "modrm")
+ (attr "length_address"))))]
(plus (plus (attr "modrm")
(plus (attr "prefix_0f")
(plus (attr "prefix_rex")
;; Mapping of unsigned max and min
(define_code_iterator umaxmin [umax umin])
+;; Mapping of signed/unsigned max and min
+(define_code_iterator maxmin [smax smin umax umin])
+
;; Base name for integer and FP insn mnemonic
(define_code_attr maxminiprefix [(smax "maxs") (smin "mins")
(umax "maxu") (umin "minu")])
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 0);"
[(set_attr "type" "fcmp,ssecomi")
+ (set_attr "prefix" "orig,maybe_vex")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 0);"
[(set_attr "type" "ssecomi")
+ (set_attr "prefix" "maybe_vex")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
[(set_attr "type" "fcmp,ssecomi")
+ (set_attr "prefix" "orig,maybe_vex")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
&& GET_MODE (operands[0]) == GET_MODE (operands[1])"
"* return output_fp_compare (insn, operands, 1, 1);"
[(set_attr "type" "ssecomi")
+ (set_attr "prefix" "maybe_vex")
(set (attr "mode")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
{
case TYPE_SSELOG1:
if (get_attr_mode (insn) == MODE_TI)
- return "pxor\t%0, %0";
- return "xorps\t%0, %0";
+ return "%vpxor\t%0, %d0";
+ return "%vxorps\t%0, %d0";
case TYPE_SSEMOV:
switch (get_attr_mode (insn))
{
case MODE_TI:
- return "movdqa\t{%1, %0|%0, %1}";
+ return "%vmovdqa\t{%1, %0|%0, %1}";
case MODE_V4SF:
- return "movaps\t{%1, %0|%0, %1}";
+ return "%vmovaps\t{%1, %0|%0, %1}";
case MODE_SI:
- return "movd\t{%1, %0|%0, %1}";
+ return "%vmovd\t{%1, %0|%0, %1}";
case MODE_SF:
- return "movss\t{%1, %0|%0, %1}";
+ return "%vmovss\t{%1, %0|%0, %1}";
default:
gcc_unreachable ();
}
(const_string "lea")
]
(const_string "imov")))
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "0,1,2,3,4,5")
+ (const_string "orig")
+ (const_string "maybe_vex")))
(set (attr "mode")
(cond [(eq_attr "alternative" "2,3")
(const_string "DI")
pxor\t%0, %0
movq\t{%1, %0|%0, %1}
movq\t{%1, %0|%0, %1}
- pxor\t%0, %0
- movq\t{%1, %0|%0, %1}
- movdqa\t{%1, %0|%0, %1}
- movq\t{%1, %0|%0, %1}
+ %vpxor\t%0, %d0
+ %vmovq\t{%1, %0|%0, %1}
+ %vmovdqa\t{%1, %0|%0, %1}
+ %vmovq\t{%1, %0|%0, %1}
xorps\t%0, %0
movlps\t{%1, %0|%0, %1}
movaps\t{%1, %0|%0, %1}
movlps\t{%1, %0|%0, %1}"
[(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "5,6,7,8")
+ (const_string "vex")
+ (const_string "orig")))
(set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
(define_split
return "movdq2q\t{%1, %0|%0, %1}";
case TYPE_SSEMOV:
+ if (TARGET_AVX)
+ {
+ if (get_attr_mode (insn) == MODE_TI)
+ return "vmovdqa\t{%1, %0|%0, %1}";
+ else
+ return "vmovq\t{%1, %0|%0, %1}";
+ }
+
if (get_attr_mode (insn) == MODE_TI)
return "movdqa\t{%1, %0|%0, %1}";
/* FALLTHRU */
return "movq\t{%1, %0|%0, %1}";
case TYPE_SSELOG1:
+ return "%vpxor\t%0, %d0";
+
case TYPE_MMXADD:
return "pxor\t%0, %0";
(const_string "imov")))
(set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
(set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "11,12,13,14,15,16")
+ (const_string "maybe_vex")
+ (const_string "orig")))
(set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")])
;; Stores and loads of ax to arbitrary constant address.
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "double")])
+(define_expand "movoi"
+ [(set (match_operand:OI 0 "nonimmediate_operand" "")
+ (match_operand:OI 1 "general_operand" ""))]
+ "TARGET_AVX"
+ "ix86_expand_move (OImode, operands); DONE;")
+
+(define_insn "*movoi_internal"
+ [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:OI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "vxorps\t%0, %0, %0";
+ case 1:
+ case 2:
+ if (misaligned_operand (operands[0], OImode)
+ || misaligned_operand (operands[1], OImode))
+ return "vmovdqu\t{%1, %0|%0, %1}";
+ else
+ return "vmovdqa\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_expand "movti"
[(set (match_operand:TI 0 "nonimmediate_operand" "")
(match_operand:TI 1 "nonimmediate_operand" ""))]
{
case 0:
if (get_attr_mode (insn) == MODE_V4SF)
- return "xorps\t%0, %0";
+ return "%vxorps\t%0, %d0";
else
- return "pxor\t%0, %0";
+ return "%vpxor\t%0, %d0";
case 1:
case 2:
/* TDmode values are passed as TImode on the stack. Moving them
|| misaligned_operand (operands[1], TImode))
{
if (get_attr_mode (insn) == MODE_V4SF)
- return "movups\t{%1, %0|%0, %1}";
+ return "%vmovups\t{%1, %0|%0, %1}";
else
- return "movdqu\t{%1, %0|%0, %1}";
+ return "%vmovdqu\t{%1, %0|%0, %1}";
}
else
{
if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ return "%vmovaps\t{%1, %0|%0, %1}";
else
- return "movdqa\t{%1, %0|%0, %1}";
+ return "%vmovdqa\t{%1, %0|%0, %1}";
}
default:
gcc_unreachable ();
}
}
[(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "prefix" "maybe_vex")
(set (attr "mode")
(cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0))
(ne (symbol_ref "optimize_size") (const_int 0)))
return "#";
case 2:
if (get_attr_mode (insn) == MODE_V4SF)
- return "xorps\t%0, %0";
+ return "%vxorps\t%0, %d0";
else
- return "pxor\t%0, %0";
+ return "%vpxor\t%0, %d0";
case 3:
case 4:
/* TDmode values are passed as TImode on the stack. Moving them
|| misaligned_operand (operands[1], TImode))
{
if (get_attr_mode (insn) == MODE_V4SF)
- return "movups\t{%1, %0|%0, %1}";
+ return "%vmovups\t{%1, %0|%0, %1}";
else
- return "movdqu\t{%1, %0|%0, %1}";
+ return "%vmovdqu\t{%1, %0|%0, %1}";
}
else
{
if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ return "%vmovaps\t{%1, %0|%0, %1}";
else
- return "movdqa\t{%1, %0|%0, %1}";
+ return "%vmovdqa\t{%1, %0|%0, %1}";
}
default:
gcc_unreachable ();
}
}
[(set_attr "type" "*,*,sselog1,ssemov,ssemov")
+ (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex")
(set (attr "mode")
(cond [(eq_attr "alternative" "2,3")
(if_then_else
return "mov{l}\t{%1, %0|%0, %1}";
case 5:
if (get_attr_mode (insn) == MODE_TI)
- return "pxor\t%0, %0";
+ return "%vpxor\t%0, %d0";
else
- return "xorps\t%0, %0";
+ return "%vxorps\t%0, %d0";
case 6:
if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ return "%vmovaps\t{%1, %0|%0, %1}";
+ else
+ return "%vmovss\t{%1, %d0|%d0, %1}";
+ case 7:
+ if (TARGET_AVX)
+ return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
+ : "vmovss\t{%1, %0|%0, %1}";
else
return "movss\t{%1, %0|%0, %1}";
- case 7: case 8:
- return "movss\t{%1, %0|%0, %1}";
+ case 8:
+ return "%vmovss\t{%1, %0|%0, %1}";
- case 9: case 10:
- case 12: case 13: case 14: case 15:
+ case 9: case 10: case 14: case 15:
return "movd\t{%1, %0|%0, %1}";
+ case 12: case 13:
+ return "%vmovd\t{%1, %0|%0, %1}";
case 11:
return "movq\t{%1, %0|%0, %1}";
}
}
[(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "5,6,7,8,12,13")
+ (const_string "maybe_vex")
+ (const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "alternative" "3,4,9,10")
(const_string "SI")
switch (get_attr_mode (insn))
{
case MODE_V4SF:
- return "xorps\t%0, %0";
+ return "%vxorps\t%0, %d0";
case MODE_V2DF:
- return "xorpd\t%0, %0";
+ return "%vxorpd\t%0, %d0";
case MODE_TI:
- return "pxor\t%0, %0";
+ return "%vpxor\t%0, %d0";
default:
gcc_unreachable ();
}
switch (get_attr_mode (insn))
{
case MODE_V4SF:
- return "movaps\t{%1, %0|%0, %1}";
+ return "%vmovaps\t{%1, %0|%0, %1}";
case MODE_V2DF:
- return "movapd\t{%1, %0|%0, %1}";
+ return "%vmovapd\t{%1, %0|%0, %1}";
case MODE_TI:
- return "movdqa\t{%1, %0|%0, %1}";
+ return "%vmovdqa\t{%1, %0|%0, %1}";
case MODE_DI:
- return "movq\t{%1, %0|%0, %1}";
+ return "%vmovq\t{%1, %0|%0, %1}";
case MODE_DF:
- return "movsd\t{%1, %0|%0, %1}";
+ if (TARGET_AVX)
+ {
+ if (REG_P (operands[0]) && REG_P (operands[1]))
+ return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+ else
+ return "vmovsd\t{%1, %0|%0, %1}";
+ }
+ else
+ return "movsd\t{%1, %0|%0, %1}";
case MODE_V1DF:
- return "movlpd\t{%1, %0|%0, %1}";
+ if (TARGET_AVX)
+ {
+ if (REG_P (operands[0]))
+ return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
+ else
+ return "vmovlpd\t{%1, %0|%0, %1}";
+ }
+ else
+ return "movlpd\t{%1, %0|%0, %1}";
case MODE_V2SF:
- return "movlps\t{%1, %0|%0, %1}";
+ if (TARGET_AVX)
+ {
+ if (REG_P (operands[0]))
+ return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+ else
+ return "vmovlps\t{%1, %0|%0, %1}";
+ }
+ else
+ return "movlps\t{%1, %0|%0, %1}";
default:
gcc_unreachable ();
}
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+ (const_string "orig")
+ (const_string "maybe_vex")))
(set (attr "mode")
(cond [(eq_attr "alternative" "0,1,2")
(const_string "DF")
switch (get_attr_mode (insn))
{
case MODE_V4SF:
- return "xorps\t%0, %0";
+ return "%vxorps\t%0, %d0";
case MODE_V2DF:
- return "xorpd\t%0, %0";
+ return "%vxorpd\t%0, %d0";
case MODE_TI:
- return "pxor\t%0, %0";
+ return "%vpxor\t%0, %d0";
default:
gcc_unreachable ();
}
switch (get_attr_mode (insn))
{
case MODE_V4SF:
- return "movaps\t{%1, %0|%0, %1}";
+ return "%vmovaps\t{%1, %0|%0, %1}";
case MODE_V2DF:
- return "movapd\t{%1, %0|%0, %1}";
+ return "%vmovapd\t{%1, %0|%0, %1}";
case MODE_TI:
- return "movdqa\t{%1, %0|%0, %1}";
+ return "%vmovdqa\t{%1, %0|%0, %1}";
case MODE_DI:
- return "movq\t{%1, %0|%0, %1}";
+ return "%vmovq\t{%1, %0|%0, %1}";
case MODE_DF:
- return "movsd\t{%1, %0|%0, %1}";
+ if (TARGET_AVX)
+ {
+ if (REG_P (operands[0]) && REG_P (operands[1]))
+ return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+ else
+ return "vmovsd\t{%1, %0|%0, %1}";
+ }
+ else
+ return "movsd\t{%1, %0|%0, %1}";
case MODE_V1DF:
- return "movlpd\t{%1, %0|%0, %1}";
+ return "%vmovlpd\t{%1, %d0|%d0, %1}";
case MODE_V2SF:
- return "movlps\t{%1, %0|%0, %1}";
+ return "%vmovlps\t{%1, %d0|%d0, %1}";
default:
gcc_unreachable ();
}
case 9:
case 10:
- return "movd\t{%1, %0|%0, %1}";
+ return "%vmovd\t{%1, %0|%0, %1}";
default:
gcc_unreachable();
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+ (const_string "orig")
+ (const_string "maybe_vex")))
(set (attr "mode")
(cond [(eq_attr "alternative" "0,1,2")
(const_string "DF")
case 0:
case 1:
if (get_attr_mode (insn) == MODE_V4SF)
- return "movaps\t{%1, %0|%0, %1}";
+ return "%vmovaps\t{%1, %0|%0, %1}";
else
- return "movdqa\t{%1, %0|%0, %1}";
+ return "%vmovdqa\t{%1, %0|%0, %1}";
case 2:
if (get_attr_mode (insn) == MODE_V4SF)
- return "xorps\t%0, %0";
+ return "%vxorps\t%0, %d0";
else
- return "pxor\t%0, %0";
+ return "%vpxor\t%0, %d0";
case 3:
case 4:
return "#";
}
}
[(set_attr "type" "ssemov,ssemov,sselog1,*,*")
+ (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*")
(set (attr "mode")
(cond [(eq_attr "alternative" "0,2")
(if_then_else
#
movd\t{%1, %0|%0, %1}
movd\t{%1, %0|%0, %1}
- movd\t{%1, %0|%0, %1}
- movd\t{%1, %0|%0, %1}"
- [(set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")
- (set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")])
+ %vmovd\t{%1, %0|%0, %1}
+ %vmovd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")
+ (set_attr "prefix" "*,*,*,orig,orig,maybe_vex,maybe_vex")
+ (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")])
(define_insn "zero_extendsidi2_rex64"
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2")
#
movd\t{%1, %0|%0, %1}
movd\t{%1, %0|%0, %1}
- movd\t{%1, %0|%0, %1}
- movd\t{%1, %0|%0, %1}"
+ %vmovd\t{%1, %0|%0, %1}
+ %vmovd\t{%1, %0|%0, %1}"
[(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov")
+ (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
(set_attr "mode" "SI,DI,DI,DI,TI,TI")])
(define_split
return output_387_reg_move (insn, operands);
case 2:
- return "cvtss2sd\t{%1, %0|%0, %1}";
+ return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "fmov,fmov,ssecvt")
+ (set_attr "prefix" "orig,orig,maybe_vex")
(set_attr "mode" "SF,XF,DF")])
(define_insn "*extendsfdf2_sse"
[(set (match_operand:DF 0 "nonimmediate_operand" "=x")
(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2 && TARGET_SSE_MATH"
- "cvtss2sd\t{%1, %0|%0, %1}"
+ "%vcvtss2sd\t{%1, %d0|%d0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "DF")])
(define_insn "*extendsfdf2_i387"
case 0:
return output_387_reg_move (insn, operands);
case 1:
- return "cvtsd2ss\t{%1, %0|%0, %1}";
+ return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "fmov,ssecvt")
+ (set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "SF")])
;; Yes, this one doesn't depend on flag_unsafe_math_optimizations,
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2 && TARGET_SSE_MATH"
- "cvtsd2ss\t{%1, %0|%0, %1}"
+ "%vcvtsd2ss\t{%1, %d0|%d0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")])
(define_insn "*truncdfsf_fast_i387"
case 1:
return "#";
case 2:
- return "cvtsd2ss\t{%1, %0|%0, %1}";
+ return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "fmov,multi,ssecvt")
(set_attr "unit" "*,i387,*")
+ (set_attr "prefix" "orig,orig,maybe_vex")
(set_attr "mode" "SF")])
(define_insn "*truncdfsf_i387"
(fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
"TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
&& (!TARGET_FISTTP || TARGET_SSE_MATH)"
- "cvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
+ "%vcvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")])
(fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
"SSE_FLOAT_MODE_P (<MODE>mode)
&& (!TARGET_FISTTP || TARGET_SSE_MATH)"
- "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
+ "%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")])
&& (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
"@
fild%z1\t%1
- cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}
- cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}"
+ %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}
+ %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
[(set_attr "type" "fmov,sseicvt,sseicvt")
+ (set_attr "prefix" "orig,maybe_vex,maybe_vex")
(set_attr "mode" "<MODEF:MODE>")
(set_attr "unit" "i387,*,*")
(set_attr "athlon_decode" "*,double,direct")
&& !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
"@
fild%z1\t%1
- cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}"
+ %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
[(set_attr "type" "fmov,sseicvt")
+ (set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "<MODEF:MODE>")
(set_attr "athlon_decode" "*,direct")
(set_attr "amdfam10_decode" "*,double")
"(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
&& SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
&& (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
- "cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}"
+ "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sseicvt")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODEF:MODE>")
(set_attr "athlon_decode" "double,direct")
(set_attr "amdfam10_decode" "vector,double")
"(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
&& SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
&& !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
- "cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}"
+ "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sseicvt")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODEF:MODE>")
(set_attr "athlon_decode" "direct")
(set_attr "amdfam10_decode" "double")
;; This pattern must be defined before *ashlti3_1 to prevent
;; combine pass from converting sse2_ashlti3 to *ashlti3_1.
+(define_insn "*avx_ashlti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (ashift:TI (match_operand:TI 1 "register_operand" "x")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+ return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_ashlti3"
[(set (match_operand:TI 0 "register_operand" "=x")
(ashift:TI (match_operand:TI 1 "register_operand" "0")
;; This pattern must be defined before *lshrti3_1 to prevent
;; combine pass from converting sse2_lshrti3 to *lshrti3_1.
+(define_insn "*avx_lshrti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "x")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+ return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_lshrti3"
[(set (match_operand:TI 0 "register_operand" "=x")
(lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
;; 0xffffffff is NaN, but not in normalized form, so we can't represent
;; it directly.
+(define_insn "*avx_setcc<mode>"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 1 "avx_comparison_float_operator"
+ [(match_operand:MODEF 2 "register_operand" "x")
+ (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+ "TARGET_AVX"
+ "vcmp%D1s<ssemodefsuffix>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*sse_setcc<mode>"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(match_operator:MODEF 1 "sse_comparison_operator"
;; Gcc is slightly more smart about handling normal two address instructions
;; so use special patterns for add and mull.
+(define_insn "*fop_<mode>_comm_mixed_avx"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (eq_attr "alternative" "1")
+ (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd"))
+ (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop"))))
+ (set_attr "prefix" "orig,maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*fop_<mode>_comm_mixed"
[(set (match_operand:MODEF 0 "register_operand" "=f,x")
(match_operator:MODEF 3 "binary_fp_operator"
(const_string "fop"))))
(set_attr "mode" "<MODE>")])
+(define_insn "*fop_<mode>_comm_avx"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "%x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd")))
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*fop_<mode>_comm_sse"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(match_operator:MODEF 3 "binary_fp_operator"
(const_string "fop")))
(set_attr "mode" "<MODE>")])
+(define_insn "*fop_<mode>_1_mixed_avx"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,f,x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+ && !COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(and (eq_attr "alternative" "2")
+ (match_operand:MODEF 3 "mult_operator" ""))
+ (const_string "ssemul")
+ (and (eq_attr "alternative" "2")
+ (match_operand:MODEF 3 "div_operator" ""))
+ (const_string "ssediv")
+ (eq_attr "alternative" "2")
+ (const_string "sseadd")
+ (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:MODEF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "prefix" "orig,orig,maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*fop_<mode>_1_mixed"
[(set (match_operand:MODEF 0 "register_operand" "=f,f,x")
(match_operator:MODEF 3 "binary_fp_operator"
(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
UNSPEC_RCP))]
"TARGET_SSE_MATH"
- "rcpss\t{%1, %0|%0, %1}"
+ "%vrcpss\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")])
+(define_insn "*fop_<mode>_1_avx"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "register_operand" "x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !COMMUTATIVE_ARITH_P (operands[3])"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:MODEF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (match_operand:MODEF 3 "div_operator" "")
+ (const_string "ssediv")
+ ]
+ (const_string "sseadd")))
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*fop_<mode>_1_sse"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(match_operator:MODEF 3 "binary_fp_operator"
(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
UNSPEC_RSQRT))]
"TARGET_SSE_MATH"
- "rsqrtss\t{%1, %0|%0, %1}"
+ "%vrsqrtss\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")])
(define_expand "rsqrtsf2"
(sqrt:MODEF
(match_operand:MODEF 1 "nonimmediate_operand" "xm")))]
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
- "sqrts<ssemodefsuffix>\t{%1, %0|%0, %1}"
+ "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")
(set_attr "athlon_decode" "*")
(set_attr "amdfam10_decode" "*")])
(match_operand:SI 2 "const_0_to_15_operand" "n")]
UNSPEC_ROUND))]
"TARGET_ROUND"
- "rounds<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "%vrounds<ssemodefsuffix>\t{%2, %1, %d0|%d0, %1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_insn "rintxf2"
;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
;; are undefined in this condition, we're certain this is correct.
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (smaxmin:MODEF
+ (match_operand:MODEF 1 "nonimmediate_operand" "%x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "v<maxminfprefix>s<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<code><mode>3"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(smaxmin:MODEF
;; Their operands are not commutative, and thus they may be used in the
;; presence of -0.0 and NaN.
+(define_insn "*avx_ieee_smin<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "vmins<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*ieee_smin<mode>3"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(unspec:MODEF
[(set_attr "type" "sseadd")
(set_attr "mode" "<MODE>")])
+(define_insn "*avx_ieee_smax<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "vmaxs<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*ieee_smax<mode>3"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(unspec:MODEF
int i;
operands[0] = gen_rtx_MEM (Pmode,
gen_rtx_PLUS (Pmode, operands[0], operands[4]));
+ /* VEX instruction with a REX prefix will #UD. */
+ if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS)
+ gcc_unreachable ();
+
output_asm_insn ("jmp\t%A1", operands);
for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
{
PUT_MODE (operands[4], TImode);
if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
output_asm_insn ("rex", operands);
- output_asm_insn ("movaps\t{%5, %4|%4, %5}", operands);
+ output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
}
(*targetm.asm_out.internal_label) (asm_out_file, "L",
CODE_LABEL_NUMBER (operands[3]));
[(set_attr "type" "other")
(set_attr "length_immediate" "0")
(set_attr "length_address" "0")
- (set_attr "length" "34")
+ (set (attr "length")
+ (if_then_else
+ (eq (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "34")
+ (const_string "42")))
(set_attr "memory" "store")
(set_attr "modrm" "0")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
(define_expand "prefetch"
Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) VarExists Save
Do not support SSE4.1 and SSE4.2 built-in functions and code generation
+mavx
+Target Report Mask(ISA_AVX) Var(ix86_isa_flags) VarExists
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2 and AVX built-in functions and code generation
+
+mfma
+Target Report Mask(ISA_FMA) Var(ix86_isa_flags) VarExists
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation
+
msse4a
Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists Save
Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
movq\t{%1, %0|%0, %1}
movdq2q\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}
- pxor\t%0, %0
- movq\t{%1, %0|%0, %1}
- movq\t{%1, %0|%0, %1}
- movd\t{%1, %0|%0, %1}
- movd\t{%1, %0|%0, %1}"
+ %vpxor\t%0, %d0
+ %vmovq\t{%1, %0|%0, %1}
+ %vmovq\t{%1, %0|%0, %1}
+ %vmovq\t{%1, %0|%0, %1}
+ %vmovq\t{%1, %0|%0, %1}"
[(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
(set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "7,8,9,10,11")
+ (const_string "maybe_vex")
+ (const_string "orig")))
(set_attr "mode" "DI")])
+(define_insn "*mov<mode>_internal_avx"
+ [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
+ "=!?y,!?y,m ,!y ,*Y2,*Y2,*Y2 ,m ,r ,m")
+ (match_operand:MMXMODEI8 1 "vector_move_operand"
+ "C ,!ym,!?y,*Y2,!y ,C ,*Y2m,*Y2,irm,r"))]
+ "TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ vpxor\t%0, %0, %0
+ vmovq\t{%1, %0|%0, %1}
+ vmovq\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,*,*")
+ (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "5,6,7")
+ (const_string "vex")
+ (const_string "orig")))
+ (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,DI,DI")])
+
(define_insn "*mov<mode>_internal"
[(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
"=!?y,!?y,m ,!y ,*Y2,*Y2,*Y2 ,m ,*x,*x,*x,m ,r ,m")
DONE;
})
+(define_insn "*movv2sf_internal_rex64_avx"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand"
+ "=rm,r ,!?y,!?y ,m ,!y,Y2,x,x,x,m,r,x")
+ (match_operand:V2SF 1 "vector_move_operand"
+ "Cr ,m ,C ,!?ym,!y,Y2,!y,C,x,m,x,x,r"))]
+ "TARGET_64BIT && TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ mov{q}\t{%1, %0|%0, %1}
+ mov{q}\t{%1, %0|%0, %1}
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ vxorps\t%0, %0, %0
+ vmovaps\t{%1, %0|%0, %1}
+ vmovlps\t{%1, %0, %0|%0, %0, %1}
+ vmovlps\t{%1, %0|%0, %1}
+ vmovq\t{%1, %0|%0, %1}
+ vmovq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "7,8,9,10,11,12")
+ (const_string "vex")
+ (const_string "orig")))
+ (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
(define_insn "*movv2sf_internal_rex64"
[(set (match_operand:V2SF 0 "nonimmediate_operand"
"=rm,r ,!?y,!?y ,m ,!y,*Y2,x,x,x,m,r,Yi")
(set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
(set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+(define_insn "*movv2sf_internal_avx"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand"
+ "=!?y,!?y ,m ,!y ,*Y2,*x,*x,*x,m ,r ,m")
+ (match_operand:V2SF 1 "vector_move_operand"
+ "C ,!?ym,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
+ "TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ vxorps\t%0, %0, %0
+ vmovaps\t{%1, %0|%0, %1}
+ vmovlps\t{%1, %0, %0|%0, %0, %1}
+ vmovlps\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
+ (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "5,6,7,8")
+ (const_string "vex")
+ (const_string "orig")))
+ (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
(define_insn "*movv2sf_internal"
[(set (match_operand:V2SF 0 "nonimmediate_operand"
"=!?y,!?y ,m ,!y ,*Y2,*x,*x,*x,m ,r ,m")
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 2, 3)")))
+;; Match 4 to 5.
+(define_predicate "const_4_to_5_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 4, 5)")))
+
;; Match 4 to 7.
(define_predicate "const_4_to_7_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 4, 7)")))
+;; Match 6 to 7.
+(define_predicate "const_6_to_7_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 6, 7)")))
+
+;; Match 8 to 11.
+(define_predicate "const_8_to_11_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 8, 11)")))
+
+;; Match 12 to 15.
+(define_predicate "const_12_to_15_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 12, 15)")))
+
;; Match exactly one bit in 2-bit mask.
(define_predicate "const_pow2_1_to_2_operand"
(and (match_code "const_int")
(define_special_predicate "sse_comparison_operator"
(match_code "eq,lt,le,unordered,ne,unge,ungt,ordered"))
+;; Return 1 if OP is a comparison operator that can be issued by
+;; avx predicate generation instructions
+(define_predicate "avx_comparison_float_operator"
+ (match_code "ne,eq,ge,gt,le,lt,unordered,ordered,uneq,unge,ungt,unle,unlt,ltgt"))
+
;; Return 1 if OP is a comparison operator that can be issued by sse predicate
;; generation instructions
(define_predicate "sse5_comparison_float_operator"
(define_predicate "misaligned_operand"
(and (match_code "mem")
(match_test "MEM_ALIGN (op) < GET_MODE_ALIGNMENT (mode)")))
+
+;; Return 1 if OP is a vzeroall operation, known to be a PARALLEL.
+(define_predicate "vzeroall_operation"
+ (match_code "parallel")
+{
+ int nregs = TARGET_64BIT ? 16 : 8;
+
+ if (XVECLEN (op, 0) != nregs + 1)
+ return 0;
+
+ return 1;
+})
;; All 16-byte vector modes handled by SSE
(define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
+;; 32 byte integral vector modes handled by AVX
+(define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
+
+;; All 32-byte vector modes handled by AVX
+(define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
+
+;; All QI vector modes handled by AVX
+(define_mode_iterator AVXMODEQI [V32QI V16QI])
+
+;; All vector modes handled by AVX
+(define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
+
;; Mix-n-match
(define_mode_iterator SSEMODE12 [V16QI V8HI])
(define_mode_iterator SSEMODE24 [V8HI V4SI])
(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
(define_mode_iterator SSEMODEF2P [V4SF V2DF])
+(define_mode_iterator AVX256MODEF2P [V8SF V4DF])
+(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
+(define_mode_iterator AVX256MODE4P [V4DI V4DF])
+(define_mode_iterator AVX256MODE8P [V8SI V8SF])
+(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
+(define_mode_iterator AVXMODEF4P [V4SF V4DF])
+(define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
+(define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
+
;; Int-float size matches
(define_mode_iterator SSEMODE4S [V4SF V4SI])
(define_mode_iterator SSEMODE2D [V2DF V2DI])
(V16QI "16") (V8HI "8")
(V4SI "4") (V2DI "2")])
+;; Mapping for AVX
+(define_mode_attr avxvecmode
+ [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
+ (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
+ (V8SF "V8SF") (V4DF "V4DF")])
+(define_mode_attr avxvecpsmode
+ [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
+ (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
+(define_mode_attr avxhalfvecmode
+ [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
+ (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
+(define_mode_attr avxscalarmode
+ [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
+ (V8SF "SF") (V4DF "DF")])
+(define_mode_attr avxcvtvecmode
+ [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
+(define_mode_attr avxpermvecmode
+ [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
+(define_mode_attr avxmodesuffixf2c
+ [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
+(define_mode_attr avxmodesuffixp
+ [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
+ (V4DF "pd")])
+(define_mode_attr avxmodesuffixs
+ [(V16QI "b") (V8HI "w") (V4SI "d")])
+(define_mode_attr avxmodesuffix
+ [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
+ (V8SI "256") (V8SF "256") (V4DF "256")])
+
;; Mapping of immediate bits for blend instructions
-(define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
+(define_mode_attr blendbits
+ [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
+
+;; Mapping of immediate bits for vpermil instructions
+(define_mode_attr vpermilbits
+ [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
+
+;; Mapping of immediate bits for pinsr instructions
+(define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_expand "mov<mode>"
+ [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
+ (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
+ "TARGET_AVX"
+{
+ ix86_expand_vector_move (<MODE>mode, operands);
+ DONE;
+})
+
+(define_insn "*avx_mov<mode>_internal"
+ [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
+ (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
+ "TARGET_AVX
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return standard_sse_constant_opcode (insn, operands[1]);
+ case 1:
+ case 2:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ case MODE_V4SF:
+ return "vmovaps\t{%1, %0|%0, %1}";
+ case MODE_V4DF:
+ case MODE_V2DF:
+ return "vmovapd\t{%1, %0|%0, %1}";
+ default:
+ return "vmovdqa\t{%1, %0|%0, %1}";
+ }
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
;; All of these patterns are enabled for SSE1 as well as SSE2.
;; This is essential for maintaining stable calling conventions.
})
(define_expand "push<mode>1"
+ [(match_operand:AVX256MODE 0 "register_operand" "")]
+ "TARGET_AVX"
+{
+ ix86_expand_push (<MODE>mode, operands[0]);
+ DONE;
+})
+
+(define_expand "push<mode>1"
[(match_operand:SSEMODE 0 "register_operand" "")]
"TARGET_SSE"
{
})
(define_expand "movmisalign<mode>"
+ [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
+ (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
+ "TARGET_AVX"
+{
+ ix86_expand_vector_move_misalign (<MODE>mode, operands);
+ DONE;
+})
+
+(define_expand "movmisalign<mode>"
[(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
(match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
DONE;
})
+(define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "sse2_movq128"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_concat:V2DI
(parallel [(const_int 0)]))
(const_int 0)))]
"TARGET_SSE2"
- "movq\t{%1, %0|%0, %1}"
+ "%vmovq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "<sse>_movup<ssemodesuffixf2c>"
[(set_attr "type" "ssemov")
(set_attr "mode" "<MODE>")])
+(define_insn "avx_movdqu<avxmodesuffix>"
+ [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
+ (unspec:AVXMODEQI
+ [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "vmovdqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "sse2_movdqu"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
[(match_operand:SSEMODEF2P 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
- "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+ "%vmovntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_insn "sse2_movntv2di"
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
- "movntdq\t{%1, %0|%0, %1}"
+ "%vmovntdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse2_movntsi"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
+(define_insn "avx_lddqu<avxmodesuffix>"
+ [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
+ (unspec:AVXMODEQI
+ [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
+ UNSPEC_LDDQU))]
+ "TARGET_AVX"
+ "vlddqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "sse3_lddqu"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
"ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
(define_expand "<plusminus_insn><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (plusminus:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<plusminus_insn><mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (plusminus:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_expand "<plusminus_insn><mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(plusminus:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
[(set_attr "type" "sseadd")
(set_attr "mode" "<MODE>")])
+(define_insn "*avx_vm<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (plusminus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
(define_insn "<sse>_vm<plusminus_insn><mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(set_attr "mode" "<ssescalarmode>")])
(define_expand "mul<mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (mult:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
+
+(define_insn "*avx_mul<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (mult:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_expand "mul<mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(mult:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
[(set_attr "type" "ssemul")
(set_attr "mode" "<MODE>")])
+(define_insn "*avx_vmmul<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
(define_insn "<sse>_vmmul<mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
[(set_attr "type" "ssemul")
(set_attr "mode" "<ssescalarmode>")])
+(define_expand "divv8sf3"
+ [(set (match_operand:V8SF 0 "register_operand" "")
+ (div:V8SF (match_operand:V8SF 1 "register_operand" "")
+ (match_operand:V8SF 2 "nonimmediate_operand" "")))]
+ "TARGET_AVX"
+{
+ ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
+
+ if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swdivsf (operands[0], operands[1],
+ operands[2], V8SFmode);
+ DONE;
+ }
+})
+
+(define_expand "divv4df3"
+ [(set (match_operand:V4DF 0 "register_operand" "")
+ (div:V4DF (match_operand:V4DF 1 "register_operand" "")
+ (match_operand:V4DF 2 "nonimmediate_operand" "")))]
+ "TARGET_AVX"
+ "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
+
+(define_insn "avx_div<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (div:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_expand "divv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "")
(div:V4SF (match_operand:V4SF 1 "register_operand" "")
"TARGET_SSE2"
"")
+(define_insn "*avx_div<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (div:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<sse>_div<mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(div:SSEMODEF2P
[(set_attr "type" "ssediv")
(set_attr "mode" "<MODE>")])
+(define_insn "*avx_vmdiv<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (div:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
(define_insn "<sse>_vmdiv<mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
[(set_attr "type" "ssediv")
(set_attr "mode" "<ssescalarmode>")])
+(define_insn "avx_rcpv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+ "TARGET_AVX"
+ "vrcpps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_insn "sse_rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
"TARGET_SSE"
- "rcpps\t{%1, %0|%0, %1}"
+ "%vrcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
+(define_insn "*avx_vmrcpv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RCP)
+ (match_operand:V4SF 2 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vrcpss\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
(define_insn "sse_vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
+(define_expand "sqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "")
+ (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
+ "TARGET_AVX"
+{
+ if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
+ DONE;
+ }
+})
+
+(define_insn "avx_sqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vsqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_expand "sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "")
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
[(set (match_operand:V4SF 0 "register_operand" "=x")
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
- "sqrtps\t{%1, %0|%0, %1}"
+ "%vsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
+(define_insn "sqrtv4df2"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vsqrtpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_insn "sqrtv2df2"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
- "sqrtpd\t{%1, %0|%0, %1}"
+ "%vsqrtpd\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2DF")])
+(define_insn "*avx_vmsqrt<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (sqrt:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
+ (match_operand:SSEMODEF2P 2 "register_operand" "x")
+ (const_int 1)))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
(define_insn "<sse>_vmsqrt<mode>2"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
[(set_attr "type" "sse")
(set_attr "mode" "<ssescalarmode>")])
+(define_expand "rsqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
+ "TARGET_AVX && TARGET_SSE_MATH"
+{
+ ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
+ DONE;
+})
+
+(define_insn "avx_rsqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
+ "TARGET_AVX"
+ "vrsqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_expand "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "")
(unspec:V4SF
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
"TARGET_SSE"
- "rsqrtps\t{%1, %0|%0, %1}"
+ "%vrsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
+(define_insn "*avx_vmrsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RSQRT)
+ (match_operand:V4SF 2 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
(define_insn "sse_vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
(define_expand "<code><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (smaxmin:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+{
+ if (!flag_finite_math_only)
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+})
+
+(define_expand "<code><mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(smaxmin:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
})
+(define_insn "*avx_<code><mode>3_finite"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (smaxmin:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*<code><mode>3_finite"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(smaxmin:SSEMODEF2P
[(set_attr "type" "sseadd")
(set_attr "mode" "<MODE>")])
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (smaxmin:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "*<code><mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(smaxmin:SSEMODEF2P
[(set_attr "type" "sseadd")
(set_attr "mode" "<MODE>")])
+(define_insn "*avx_vm<code><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (smaxmin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
(define_insn "<sse>_vm<code><mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
;; Their operands are not commutative, and thus they may be used in the
;; presence of -0.0 and NaN.
+(define_insn "*avx_ieee_smin<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "*avx_ieee_smax<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "*ieee_smin<mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(unspec:SSEMODEF2P
[(set_attr "type" "sseadd")
(set_attr "mode" "<MODE>")])
+(define_insn "avx_addsubv8sf3"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_merge:V8SF
+ (plus:V8SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (minus:V8SF (match_dup 1) (match_dup 2))
+ (const_int 85)))]
+ "TARGET_AVX"
+ "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "avx_addsubv4df3"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_merge:V4DF
+ (plus:V4DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (minus:V4DF (match_dup 1) (match_dup 2))
+ (const_int 5)))]
+ "TARGET_AVX"
+ "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "*avx_addsubv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (plus:V4SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (minus:V4SF (match_dup 1) (match_dup 2))
+ (const_int 5)))]
+ "TARGET_AVX"
+ "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "sse3_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
+(define_insn "*avx_addsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (plus:V2DF
+ (match_operand:V2DF 1 "register_operand" "x")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (minus:V2DF (match_dup 1) (match_dup 2))
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF")])
+
(define_insn "sse3_addsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
+(define_insn "avx_h<plusminus_insn>v4df3"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_concat:V4DF
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:DF
+ (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
+ (plusminus:DF
+ (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_AVX"
+ "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "avx_h<plusminus_insn>v8sf3"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_concat:V8SF
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_h<plusminus_insn>v4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_AVX"
+ "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "sse3_h<plusminus_insn>v4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_concat:V4SF
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
+(define_insn "*avx_h<plusminus_insn>v2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_AVX"
+ "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF")])
+
(define_insn "sse3_h<plusminus_insn>v2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_concat:V2DF
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "<sse>_maskcmp<mode>3"
+(define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_31_operand" "n")]
+ UNSPEC_PCMP))]
+ "TARGET_AVX"
+ "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (vec_merge:SSEMODEF2P
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_31_operand" "n")]
+ UNSPEC_PCMP)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+;; We don't promote 128bit vector compare intrinsics. But vectorizer
+;; may generate 256bit vector compare instructions.
+(define_insn "*avx_maskcmp<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "<sse>_maskcmp<mode>3"
[(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
(match_operator:SSEMODEF4 3 "sse_comparison_operator"
[(match_operand:SSEMODEF4 1 "register_operand" "0")
(match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))))]
"SSE_FLOAT_MODE_P (<MODE>mode)"
- "comis<ssemodefsuffix>\t{%1, %0|%0, %1}"
+ "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecomi")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_insn "<sse>_ucomi"
(match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))))]
"SSE_FLOAT_MODE_P (<MODE>mode)"
- "ucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
+ "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecomi")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_expand "vcond<mode>"
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "avx_nand<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (and:AVXMODEF2P
+ (not:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "register_operand" "x"))
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "<sse>_nand<mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(and:SSEMODEF2P
(set_attr "mode" "<MODE>")])
(define_expand "<code><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (plogic:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (plogic:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_expand "<code><mode>3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(plogic:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
;; allocation lossage. These patterns do not allow memory operands
;; because the native instructions read the full 128-bits.
+(define_insn "*avx_nand<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (and:MODEF
+ (not:MODEF
+ (match_operand:MODEF 1 "register_operand" "x"))
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "AVX_FLOAT_MODE_P (<MODE>mode)"
+ "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssevecmode>")])
+
(define_insn "*nand<mode>3"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(and:MODEF
[(set_attr "type" "sselog")
(set_attr "mode" "<ssevecmode>")])
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (plogic:MODEF
+ (match_operand:MODEF 1 "register_operand" "x")
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "AVX_FLOAT_MODE_P (<MODE>mode)"
+ "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssevecmode>")])
+
(define_insn "*<code><mode>3"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(plogic:MODEF
(set_attr "unit" "mmx")
(set_attr "mode" "SF")])
+(define_insn "*avx_cvtsi2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (match_operand:V4SF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
(define_insn "sse_cvtsi2ss"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
(set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
+(define_insn "*avx_cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (match_operand:V4SF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX && TARGET_64BIT"
+ "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
(define_insn "sse_cvtsi2ssq"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE"
- "cvtss2si\t{%1, %0|%0, %1}"
+ "%vcvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_insn "sse_cvtss2si_2"
(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE"
- "cvtss2si\t{%1, %0|%0, %1}"
+ "%vcvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_insn "sse_cvtss2siq"
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE && TARGET_64BIT"
- "cvtss2siq\t{%1, %0|%0, %1}"
+ "%vcvtss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
(define_insn "sse_cvtss2siq_2"
(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE && TARGET_64BIT"
- "cvtss2siq\t{%1, %0|%0, %1}"
+ "%vcvtss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
(define_insn "sse_cvttss2si"
(match_operand:V4SF 1 "nonimmediate_operand" "x,m")
(parallel [(const_int 0)]))))]
"TARGET_SSE"
- "cvttss2si\t{%1, %0|%0, %1}"
+ "%vcvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_insn "sse_cvttss2siq"
(match_operand:V4SF 1 "nonimmediate_operand" "x,m")
(parallel [(const_int 0)]))))]
"TARGET_SSE && TARGET_64BIT"
- "cvttss2siq\t{%1, %0|%0, %1}"
+ "%vcvttss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
+(define_insn "avx_cvtdq2ps<avxmodesuffix>"
+ [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
+ (float:AVXMODEDCVTDQ2PS
+ (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtdq2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "sse2_cvtdq2ps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
+(define_insn "avx_cvtps2dq<avxmodesuffix>"
+ [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
+ (unspec:AVXMODEDCVTPS2DQ
+ [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX"
+ "vcvtps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "sse2_cvtps2dq"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "avx_cvttps2dq<avxmodesuffix>"
+ [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
+ (fix:AVXMODEDCVTPS2DQ
+ (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvttps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "sse2_cvttps2dq"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_cvtsi2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (match_operand:V2DF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF")])
+
(define_insn "sse2_cvtsi2sd"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
(set_attr "athlon_decode" "double,direct")
(set_attr "amdfam10_decode" "vector,double")])
+(define_insn "*avx_cvtsi2sdq"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (match_operand:V2DF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX && TARGET_64BIT"
+ "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF")])
+
(define_insn "sse2_cvtsi2sdq"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2"
- "cvtsd2si\t{%1, %0|%0, %1}"
+ "%vcvtsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_insn "sse2_cvtsd2si_2"
(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2"
- "cvtsd2si\t{%1, %0|%0, %1}"
+ "%vcvtsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_insn "sse2_cvtsd2siq"
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2 && TARGET_64BIT"
- "cvtsd2siq\t{%1, %0|%0, %1}"
+ "%vcvtsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
(define_insn "sse2_cvtsd2siq_2"
(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2 && TARGET_64BIT"
- "cvtsd2siq\t{%1, %0|%0, %1}"
+ "%vcvtsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
(define_insn "sse2_cvttsd2si"
(match_operand:V2DF 1 "nonimmediate_operand" "x,m")
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
- "cvttsd2si\t{%1, %0|%0, %1}"
+ "%vcvttsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")])
(match_operand:V2DF 1 "nonimmediate_operand" "x,m")
(parallel [(const_int 0)]))))]
"TARGET_SSE2 && TARGET_64BIT"
- "cvttsd2siq\t{%1, %0|%0, %1}"
+ "%vcvttsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")])
+(define_insn "avx_cvtdq2pd256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtdq2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_insn "sse2_cvtdq2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(float:V2DF
(match_operand:V4SI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_SSE2"
- "cvtdq2pd\t{%1, %0|%0, %1}"
+ "%vcvtdq2pd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2DF")])
+(define_insn "avx_cvtpd2dq256"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX"
+ "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_expand "sse2_cvtpd2dq"
[(set (match_operand:V4SI 0 "register_operand" "")
(vec_concat:V4SI
UNSPEC_FIX_NOTRUNC)
(match_operand:V2SI 2 "const0_operand" "")))]
"TARGET_SSE2"
- "cvtpd2dq\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
+ : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
[(set_attr "type" "ssecvt")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")
(set_attr "amdfam10_decode" "double")])
+(define_insn "avx_cvttpd2dq256"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_expand "sse2_cvttpd2dq"
[(set (match_operand:V4SI 0 "register_operand" "")
(vec_concat:V4SI
(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
(match_operand:V2SI 2 "const0_operand" "")))]
"TARGET_SSE2"
- "cvttpd2dq\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
+ : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
[(set_attr "type" "ssecvt")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")
(set_attr "amdfam10_decode" "double")])
+(define_insn "*avx_cvtsd2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
+ (match_operand:V4SF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
(define_insn "sse2_cvtsd2ss"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
(set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
+(define_insn "*avx_cvtss2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)])))
+ (match_operand:V2DF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF")])
+
(define_insn "sse2_cvtss2sd"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
(set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "DF")])
+(define_insn "avx_cvtpd2ps256"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (float_truncate:V4SF
+ (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
(define_expand "sse2_cvtpd2ps"
[(set (match_operand:V4SF 0 "register_operand" "")
(vec_concat:V4SF
(match_operand:V2DF 1 "nonimmediate_operand" "xm"))
(match_operand:V2SF 2 "const0_operand" "")))]
"TARGET_SSE2"
- "cvtpd2ps\t{%1, %0|%0, %1}"
+ "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
+ : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
[(set_attr "type" "ssecvt")
(set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")
(set_attr "amdfam10_decode" "double")])
+(define_insn "avx_cvtps2pd256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (float_extend:V4DF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtps2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_insn "sse2_cvtps2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(float_extend:V2DF
(match_operand:V4SF 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_SSE2"
- "cvtps2pd\t{%1, %0|%0, %1}"
+ "%vcvtps2pd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2DF")
(set_attr "amdfam10_decode" "direct")])
"TARGET_SSE"
"ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+(define_insn "*avx_movhlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))]
+ "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vmovhlps\t{%2, %1, %0|%0, %1, %2}
+ vmovlps\t{%H2, %1, %0|%0, %1, %H2}
+ vmovhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
(define_insn "sse_movhlps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
(vec_select:V4SF
"TARGET_SSE"
"ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+(define_insn "*avx_movlhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 4)
+ (const_int 5)])))]
+ "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+ "@
+ vmovlhps\t{%2, %1, %0|%0, %1, %2}
+ vmovhps\t{%2, %1, %0|%0, %1, %2}
+ vmovlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
(define_insn "sse_movlhps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
(vec_select:V4SF
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF,V2SF,V2SF")])
+(define_insn "avx_unpckhps256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)
+ (const_int 10) (const_int 14)
+ (const_int 11) (const_int 15)])))]
+ "TARGET_AVX"
+ "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_unpckhps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_AVX"
+ "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "sse_unpckhps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_select:V4SF
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
+(define_insn "avx_unpcklps256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)
+ (const_int 8) (const_int 12)
+ (const_int 9) (const_int 13)])))]
+ "TARGET_AVX"
+ "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_unpcklps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_AVX"
+ "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "sse_unpcklps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_select:V4SF
;; These are modeled with the same vec_concat as the others so that we
;; capture users of shufps that can use the new instructions
+(define_insn "avx_movshdup256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 1) (const_int 1)
+ (const_int 3) (const_int 3)
+ (const_int 5) (const_int 5)
+ (const_int 7) (const_int 7)])))]
+ "TARGET_AVX"
+ "vmovshdup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_insn "sse3_movshdup"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_select:V4SF
(const_int 7)
(const_int 7)])))]
"TARGET_SSE3"
- "movshdup\t{%1, %0|%0, %1}"
+ "%vmovshdup\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
+(define_insn "avx_movsldup256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 0) (const_int 0)
+ (const_int 2) (const_int 2)
+ (const_int 4) (const_int 4)
+ (const_int 6) (const_int 6)])))]
+ "TARGET_AVX"
+ "vmovsldup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_insn "sse3_movsldup"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_select:V4SF
(const_int 6)
(const_int 6)])))]
"TARGET_SSE3"
- "movsldup\t{%1, %0|%0, %1}"
+ "%vmovsldup\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
+(define_expand "avx_shufps256"
+ [(match_operand:V8SF 0 "register_operand" "")
+ (match_operand:V8SF 1 "register_operand" "")
+ (match_operand:V8SF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_AVX"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 8),
+ GEN_INT (((mask >> 6) & 3) + 8),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 12),
+ GEN_INT (((mask >> 6) & 3) + 12)));
+ DONE;
+})
+
+;; One bit in mask selects 2 elements.
+(define_insn "avx_shufps256_1"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_8_to_11_operand" "")
+ (match_operand 6 "const_8_to_11_operand" "")
+ (match_operand 7 "const_4_to_7_operand" "")
+ (match_operand 8 "const_4_to_7_operand" "")
+ (match_operand 9 "const_12_to_15_operand" "")
+ (match_operand 10 "const_12_to_15_operand" "")])))]
+ "TARGET_AVX
+ && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
+ && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
+ && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
+ && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 8) << 4;
+ mask |= (INTVAL (operands[6]) - 8) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_expand "sse_shufps"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")
DONE;
})
+(define_insn "*avx_shufps_<mode>"
+ [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
+ (vec_select:SSEMODE4S
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE4S 1 "register_operand" "x")
+ (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_4_to_7_operand" "")
+ (match_operand 6 "const_4_to_7_operand" "")])))]
+ "TARGET_AVX"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[3]) << 0;
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 4) << 4;
+ mask |= (INTVAL (operands[6]) - 4) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "sse_shufps_<mode>"
[(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
(vec_select:SSEMODE4S
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_SSE"
"@
- movhps\t{%1, %0|%0, %1}
- movhlps\t{%1, %0|%0, %1}
- movlps\t{%H1, %0|%0, %H1}"
+ %vmovhps\t{%1, %0|%0, %1}
+ %vmovhlps\t{%1, %d0|%d0, %1}
+ %vmovlps\t{%H1, %d0|%d0, %H1}"
[(set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2SF,V4SF,V2SF")])
(define_expand "sse_loadhps_exp"
"TARGET_SSE"
"ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+(define_insn "*avx_loadhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+ "TARGET_AVX"
+ "@
+ vmovhps\t{%2, %1, %0|%0, %1, %2}
+ vmovlhps\t{%2, %1, %0|%0, %1, %2}
+ vmovlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
(define_insn "sse_loadhps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
(vec_concat:V4SF
[(set_attr "type" "ssemov")
(set_attr "mode" "V2SF,V4SF,V2SF")])
+(define_insn "*avx_storelps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_AVX"
+ "@
+ vmovlps\t{%1, %0|%0, %1}
+ vmovaps\t{%1, %0|%0, %1}
+ vmovlps\t{%1, %0, %0|%0, %0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2SF,V2DF,V2SF")])
+
(define_insn "sse_storelps"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
(vec_select:V2SF
"TARGET_SSE"
"ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+(define_insn "*avx_loadlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "@
+ shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
+ vmovlps\t{%2, %1, %0|%0, %1, %2}
+ vmovlps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
(define_insn "sse_loadlps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
(vec_concat:V4SF
[(set_attr "type" "sselog,ssemov,ssemov")
(set_attr "mode" "V4SF,V2SF,V2SF")])
+(define_insn "*avx_movss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vmovss\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
(define_insn "sse_movss"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
[(set_attr "type" "ssemov")
(set_attr "mode" "SF")])
+(define_insn "*vec_dupv4sf_avx"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "register_operand" "x")))]
+ "TARGET_AVX"
+ "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "*vec_dupv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_duplicate:V4SF
[(set_attr "type" "sselog1")
(set_attr "mode" "V4SF")])
+(define_insn "*vec_concatv2sf_avx"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
+ (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
+ "TARGET_AVX"
+ "@
+ vunpcklps\t{%2, %1, %0|%0, %1, %2}
+ vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
+ vmovss\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "3,4")
+ (const_string "orig")
+ (const_string "vex")))
+ (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
+
;; Although insertps takes register source, we prefer
;; unpcklps with register source since it is shorter.
(define_insn "*vec_concatv2sf_sse4_1"
[(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "V4SF,SF,DI,DI")])
+(define_insn "*vec_concatv4sf_avx"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_concat:V4SF
+ (match_operand:V2SF 1 "register_operand" " x,x")
+ (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
+ "TARGET_AVX"
+ "@
+ vmovlhps\t{%2, %1, %0|%0, %1, %2}
+ vmovhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF,V2SF")])
+
(define_insn "*vec_concatv4sf_sse"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_concat:V4SF
DONE;
})
-(define_insn "vec_setv4sf_0"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
+(define_insn "*vec_setv4sf_0_avx"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
(vec_merge:V4SF
(vec_duplicate:V4SF
(match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
- (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
+ (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
(const_int 1)))]
- "TARGET_SSE"
+ "TARGET_AVX"
"@
- movss\t{%2, %0|%0, %2}
- movss\t{%2, %0|%0, %2}
+ vmovss\t{%2, %1, %0|%0, %1, %2}
+ vmovss\t{%2, %0|%0, %2}
+ vmovd\t{%2, %0|%0, %2}
+ #"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "vec_setv4sf_0"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
+ (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "@
+ movss\t{%2, %0|%0, %2}
+ movss\t{%2, %0|%0, %2}
movd\t{%2, %0|%0, %2}
#"
[(set_attr "type" "ssemov")
(set_attr "mode" "SF")])
;; A subset is vec_setv4sf.
+(define_insn "*vec_setv4sf_avx"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+ return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "*vec_setv4sf_sse4_1"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
(set_attr "prefix_extra" "1")
(set_attr "mode" "V4SF")])
+(define_insn "*avx_insertps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_INSERTPS))]
+ "TARGET_AVX"
+ "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "sse4_1_insertps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
DONE;
})
+(define_expand "avx_vextractf128<mode>"
+ [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
+ (match_operand:AVX256MODE 1 "register_operand" "")
+ (match_operand:SI 2 "const_0_to_1_operand" "")]
+ "TARGET_AVX"
+{
+ switch (INTVAL (operands[2]))
+ {
+ case 0:
+ emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
+ break;
+ case 1:
+ emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+})
+
+(define_insn "vec_extract_lo_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_lo_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_lo_v16hi"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x,x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_v16hi"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x,x")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_lo_v32qi"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x,x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_v32qi"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x,x")
+ (parallel [(const_int 16) (const_int 17)
+ (const_int 18) (const_int 19)
+ (const_int 20) (const_int 21)
+ (const_int 22) (const_int 23)
+ (const_int 24) (const_int 25)
+ (const_int 26) (const_int 27)
+ (const_int 28) (const_int 29)
+ (const_int 30) (const_int 31)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
(define_insn "*sse4_1_extractps"
[(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
(vec_select:SF
(match_operand:V4SF 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
"TARGET_SSE4_1"
- "extractps\t{%2, %1, %0|%0, %1, %2}"
+ "%vextractps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
(define_insn_and_split "*vec_extract_v4sf_mem"
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "avx_unpckhpd256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_AVX"
+ "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_expand "sse2_unpckhpd_exp"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "")
(vec_select:V2DF
"TARGET_SSE2"
"ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+(define_insn "*avx_unpckhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vunpckhpd\t{%2, %1, %0|%0, %1, %2}
+ vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
+ vmovhpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
(define_insn "sse2_unpckhpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
(vec_select:V2DF
[(set_attr "type" "sselog,ssemov,ssemov")
(set_attr "mode" "V2DF,V1DF,V1DF")])
+(define_insn "avx_movddup256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))]
+ "TARGET_AVX"
+ "vmovddup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "*avx_movddup"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ vmovddup\t{%1, %0|%0, %1}
+ #"
+ [(set_attr "type" "sselog1,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF")])
+
(define_insn "*sse3_movddup"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
(vec_select:V2DF
DONE;
})
+(define_insn "avx_unpcklpd256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_AVX"
+ "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_expand "sse2_unpcklpd_exp"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "")
(vec_select:V2DF
"TARGET_SSE2"
"ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+(define_insn "*avx_unpcklpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+ vmovhpd\t{%2, %1, %0|%0, %1, %2}
+ vmovlpd\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
(define_insn "sse2_unpcklpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
(vec_select:V2DF
[(set_attr "type" "sselog,ssemov,ssemov")
(set_attr "mode" "V2DF,V1DF,V1DF")])
+(define_expand "avx_shufpd256"
+ [(match_operand:V4DF 0 "register_operand" "")
+ (match_operand:V4DF 1 "register_operand" "")
+ (match_operand:V4DF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_AVX"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 5 : 4),
+ GEN_INT (mask & 4 ? 3 : 2),
+ GEN_INT (mask & 8 ? 7 : 6)));
+ DONE;
+})
+
+(define_insn "avx_shufpd256_1"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand" "")
+ (match_operand 4 "const_4_to_5_operand" "")
+ (match_operand 5 "const_2_to_3_operand" "")
+ (match_operand 6 "const_6_to_7_operand" "")])))]
+ "TARGET_AVX"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 4) << 1;
+ mask |= (INTVAL (operands[5]) - 2) << 2;
+ mask |= (INTVAL (operands[6]) - 6) << 3;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_expand "sse2_shufpd"
[(match_operand:V2DF 0 "register_operand" "")
(match_operand:V2DF 1 "register_operand" "")
"TARGET_SSE2")
;; punpcklqdq and punpckhqdq are shorter than shufpd.
+(define_insn "*avx_punpckhqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_AVX"
+ "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_punpckhqdq"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_select:V2DI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_punpcklqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_AVX"
+ "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_punpcklqdq"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_select:V2DI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_shufpd_<mode>"
+ [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
+ (vec_select:SSEMODE2D
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE2D 1 "register_operand" "x")
+ (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand" "")
+ (match_operand 4 "const_2_to_3_operand" "")])))]
+ "TARGET_AVX"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 2) << 1;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF")])
+
(define_insn "sse2_shufpd_<mode>"
[(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
(vec_select:SSEMODE2D
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*avx_storehpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ vmovhpd\t{%1, %0|%0, %1}
+ vunpckhpd\t{%1, %1, %0|%0, %1, %1}
+ #
+ #
+ #"
+ [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
+
(define_insn "sse2_storehpd"
[(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
(vec_select:DF
(parallel [(const_int 0)])))]
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
- movlpd\t{%1, %0|%0, %1}
+ %vmovlpd\t{%1, %0|%0, %1}
#
#
#
#"
[(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "V1DF,DF,DF,DF,DF")])
(define_split
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*avx_loadhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
+ (parallel [(const_int 0)]))
+ (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
+ "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vmovhpd\t{%2, %1, %0|%0, %1, %2}
+ vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+ #
+ #
+ #"
+ [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
+
(define_insn "sse2_loadhpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
(vec_concat:V2DF
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*avx_loadlpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
+ (vec_concat:V2DF
+ (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
+ (vec_select:DF
+ (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
+ (parallel [(const_int 1)]))))]
+ "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vmovsd\t{%2, %0|%0, %2}
+ vmovlpd\t{%2, %1, %0|%0, %1, %2}
+ vmovsd\t{%2, %1, %0|%0, %1, %2}
+ vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
+ #
+ #
+ #"
+ [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
+
(define_insn "sse2_loadlpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
(vec_concat:V2DF
[(set_attr "type" "ssemov")
(set_attr "mode" "V2SF,V4SF,V2SF")])
+(define_insn "*avx_movsd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
+ (vec_merge:V2DF
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "@
+ vmovsd\t{%2, %1, %0|%0, %1, %2}
+ vmovlpd\t{%2, %1, %0|%0, %1, %2}
+ vmovlpd\t{%2, %0|%0, %2}
+ vmovhps\t{%H1, %2, %0|%0, %2, %H1}
+ vmovhps\t{%1, %H0|%H0, %1}"
+ [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
+
(define_insn "sse2_movsd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
(vec_merge:V2DF
(vec_duplicate:V2DF
(match_operand:DF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE3"
- "movddup\t{%1, %0|%0, %1}"
+ "%vmovddup\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "DF")])
(define_insn "vec_dupv2df"
(match_operand:DF 1 "nonimmediate_operand" "xm")
(match_dup 1)))]
"TARGET_SSE3"
- "movddup\t{%1, %0|%0, %1}"
+ "%vmovddup\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "DF")])
+(define_insn "*vec_concatv2df_avx"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
+ (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
+ "TARGET_AVX"
+ "@
+ vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+ vmovhpd\t{%2, %1, %0|%0, %1, %2}
+ vmovsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF,V1DF,DF")])
+
(define_insn "*vec_concatv2df"
[(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
(vec_concat:V2DF
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+(define_insn "*avx_<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (plusminus:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*<plusminus_insn><mode>3"
[(set (match_operand:SSEMODEI 0 "register_operand" "=x")
(plusminus:SSEMODEI
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+(define_insn "*avx_<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+ (sat_plusminus:SSEMODE12
+ (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse2_<plusminus_insn><mode>3"
[(set (match_operand:SSEMODE12 0 "register_operand" "=x")
(sat_plusminus:SSEMODE12
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+(define_insn "*avx_mulv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "vpmullw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*mulv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+(define_insn "*avxv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*smulv8hi3_highpart"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(truncate:V8HI
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+(define_insn "*avx_umulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*umulv8hi3_highpart"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(truncate:V8HI
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+(define_insn "*avx_umulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 0) (const_int 2)])))
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse2_umulv2siv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(mult:V2DI
(parallel [(const_int 0) (const_int 2)])))))]
"TARGET_SSE4_1"
"ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
-
-(define_insn "*sse4_1_mulv2siv2di3"
+
+(define_insn "*avx_mulv2siv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(mult:V2DI
(sign_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
- "pmuldq\t{%2, %0|%0, %2}"
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseimul")
- (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "vex")
(set_attr "mode" "TI")])
-(define_expand "sse2_pmaddwd"
- [(set (match_operand:V4SI 0 "register_operand" "")
- (plus:V4SI
- (mult:V4SI
+(define_insn "*sse4_1_mulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmuldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_pmaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (plus:V4SI
+ (mult:V4SI
(sign_extend:V4SI
(vec_select:V4HI
(match_operand:V8HI 1 "nonimmediate_operand" "")
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+(define_insn "*avx_pmaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse2_pmaddwd"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(plus:V4SI
ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
})
+(define_insn "*avx_mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "vpmulld\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse4_1_mulv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
DONE;
})
+(define_insn "*avx_ashr<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+ (ashiftrt:SSEMODE24
+ (match_operand:SSEMODE24 1 "register_operand" "x")
+ (match_operand:SI 2 "nonmemory_operand" "xN")))]
+ "TARGET_AVX"
+ "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ashr<mode>3"
[(set (match_operand:SSEMODE24 0 "register_operand" "=x")
(ashiftrt:SSEMODE24
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_lshr<mode>3"
+ [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+ (lshiftrt:SSEMODE248
+ (match_operand:SSEMODE248 1 "register_operand" "x")
+ (match_operand:SI 2 "nonmemory_operand" "xN")))]
+ "TARGET_AVX"
+ "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "lshr<mode>3"
[(set (match_operand:SSEMODE248 0 "register_operand" "=x")
(lshiftrt:SSEMODE248
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_ashl<mode>3"
+ [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+ (ashift:SSEMODE248
+ (match_operand:SSEMODE248 1 "register_operand" "x")
+ (match_operand:SI 2 "nonmemory_operand" "xN")))]
+ "TARGET_AVX"
+ "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ashl<mode>3"
[(set (match_operand:SSEMODE248 0 "register_operand" "=x")
(ashift:SSEMODE248
operands[1] = gen_lowpart (TImode, operands[1]);
})
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (maxmin:SSEMODE124
+ (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_expand "<code>v16qi3"
[(set (match_operand:V16QI 0 "register_operand" "")
(umaxmin:V16QI
"TARGET_SSE2 && !TARGET_SSE5"
"ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+(define_insn "*avx_eq<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (eq:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse2_eq<mode>3"
[(set (match_operand:SSEMODE124 0 "register_operand" "=x")
(eq:SSEMODE124
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_gt<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (gt:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "register_operand" "x")
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_gt<mode>3"
[(set (match_operand:SSEMODE124 0 "register_operand" "=x")
(gt:SSEMODE124
operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
})
+(define_insn "*avx_nand<mode>3"
+ [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
+ (and:AVX256MODEI
+ (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
+ (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vandnps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecpsmode>")])
+
(define_insn "*sse_nand<mode>3"
[(set (match_operand:SSEMODEI 0 "register_operand" "=x")
(and:SSEMODEI
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
+(define_insn "*avx_nand<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (and:SSEMODEI
+ (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vpandn\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_nand<mode>3"
[(set (match_operand:SSEMODEI 0 "register_operand" "=x")
(and:SSEMODEI
"TARGET_SSE"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
+ (plogic:AVX256MODEI
+ (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
+ (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecpsmode>")])
+
(define_insn "*sse_<code><mode>3"
[(set (match_operand:SSEMODEI 0 "register_operand" "=x")
(plogic:SSEMODEI
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (plogic:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse2_<code><mode>3"
[(set (match_operand:SSEMODEI 0 "register_operand" "=x")
(plogic:SSEMODEI
(const_int 2)])))]
"TARGET_SSE2")
+(define_insn "*avx_packsswb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_concat:V16QI
+ (ss_truncate:V8QI
+ (match_operand:V8HI 1 "register_operand" "x"))
+ (ss_truncate:V8QI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_AVX"
+ "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_packsswb"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_concat:V16QI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_packssdw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (ss_truncate:V4HI
+ (match_operand:V4SI 1 "register_operand" "x"))
+ (ss_truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_AVX"
+ "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_packssdw"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_packuswb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_concat:V16QI
+ (us_truncate:V8QI
+ (match_operand:V8HI 1 "register_operand" "x"))
+ (us_truncate:V8QI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_AVX"
+ "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_packuswb"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_concat:V16QI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_punpckhbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "TARGET_AVX"
+ "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_punpckhbw"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_select:V16QI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_punpcklbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)])))]
+ "TARGET_AVX"
+ "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_punpcklbw"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_select:V16QI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_punpckhwd"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_AVX"
+ "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_punpckhwd"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_select:V8HI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_punpcklwd"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)])))]
+ "TARGET_AVX"
+ "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_punpcklwd"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_select:V8HI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_punpckhdq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_AVX"
+ "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_punpckhdq"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_select:V4SI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_punpckldq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_AVX"
+ "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_punpckldq"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_select:V4SI
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_pinsr<avxmodesuffixs>"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (vec_merge:SSEMODE124
+ (vec_duplicate:SSEMODE124
+ (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
+ (match_operand:SSEMODE124 1 "register_operand" "x")
+ (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "vpinsr<avxmodesuffixs>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse4_1_pinsrb"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_merge:V16QI
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_pinsrq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_merge:V2DI
+ (vec_duplicate:V2DI
+ (match_operand:DI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
+ "TARGET_AVX && TARGET_64BIT"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse4_1_pinsrq"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_merge:V2DI
(match_operand:V16QI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
"TARGET_SSE4_1"
- "pextrb\t{%2, %1, %0|%0, %1, %2}"
+ "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_pextrb_memory"
(match_operand:V16QI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
"TARGET_SSE4_1"
- "pextrb\t{%2, %1, %0|%0, %1, %2}"
+ "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse2_pextrw"
(match_operand:V8HI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
"TARGET_SSE2"
- "pextrw\t{%2, %1, %0|%0, %1, %2}"
+ "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_pextrw_memory"
(match_operand:V8HI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
"TARGET_SSE4_1"
- "pextrw\t{%2, %1, %0|%0, %1, %2}"
+ "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_pextrd"
(match_operand:V4SI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
"TARGET_SSE4_1"
- "pextrd\t{%2, %1, %0|%0, %1, %2}"
+ "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
;; It must come before *vec_extractv2di_1_sse since it is preferred.
(match_operand:V2DI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
"TARGET_SSE4_1 && TARGET_64BIT"
- "pextrq\t{%2, %1, %0|%0, %1, %2}"
+ "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_expand "sse2_pshufd"
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "pshufd\t{%2, %1, %0|%0, %1, %2}";
+ return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
}
[(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
+ (set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(define_expand "sse2_pshuflw"
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
+ return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_expand "sse2_pshufhw"
mask |= (INTVAL (operands[5]) - 4) << 6;
operands[2] = GEN_INT (mask);
- return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
+ return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_expand "sse2_loadd"
"TARGET_SSE"
"operands[2] = CONST0_RTX (V4SImode);")
+(define_insn "*avx_loadld"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
+ (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "@
+ vmovd\t{%2, %0|%0, %2}
+ vmovd\t{%2, %0|%0, %2}
+ vmovss\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI,TI,V4SF")])
+
(define_insn "sse2_loadld"
[(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
(vec_merge:V4SI
"@
#
#
- mov{q}\t{%1, %0|%0, %1}"
+ %vmov{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "*,*,imov")
+ (set_attr "prefix" "*,*,maybe_vex")
(set_attr "mode" "*,*,DI")])
(define_insn "*sse2_storeq"
operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
})
+(define_insn "*vec_extractv2di_1_rex64_avx"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_64BIT
+ && TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ vmovhps\t{%1, %0|%0, %1}
+ vpsrldq\t{$8, %1, %0|%0, %1, 8}
+ vmovq\t{%H1, %0|%0, %H1}
+ vmov{q}\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov,sseishft,ssemov,imov")
+ (set_attr "memory" "*,none,*,*")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2SF,TI,TI,DI")])
+
(define_insn "*vec_extractv2di_1_rex64"
[(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
(vec_select:DI
(set_attr "memory" "*,none,*,*")
(set_attr "mode" "V2SF,TI,TI,DI")])
+(define_insn "*vec_extractv2di_1_avx"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_64BIT
+ && TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ vmovhps\t{%1, %0|%0, %1}
+ vpsrldq\t{$8, %1, %0|%0, %1, 8}
+ vmovq\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov,sseishft,ssemov")
+ (set_attr "memory" "*,none,*")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2SF,TI,TI")])
+
(define_insn "*vec_extractv2di_1_sse2"
[(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
(vec_select:DI
(match_operand:SI 1 "register_operand" " Y2,0")))]
"TARGET_SSE"
"@
- pshufd\t{$0, %1, %0|%0, %1, 0}
+ %vpshufd\t{$0, %1, %0|%0, %1, 0}
shufps\t{$0, %0, %0|%0, %0, 0}"
[(set_attr "type" "sselog1")
+ (set_attr "prefix" "maybe_vex,orig")
(set_attr "mode" "TI,V4SF")])
-(define_insn "*vec_dupv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
+(define_insn "*vec_dupv2di_avx"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_duplicate:V2DI
- (match_operand:DI 1 "register_operand" " 0 ,0")))]
+ (match_operand:DI 1 "register_operand" "x")))]
+ "TARGET_AVX"
+ "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*vec_dupv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
+ (vec_duplicate:V2DI
+ (match_operand:DI 1 "register_operand" " 0 ,0")))]
"TARGET_SSE"
"@
punpcklqdq\t%0, %0
[(set_attr "type" "sselog1,ssemov")
(set_attr "mode" "TI,V4SF")])
+(define_insn "*vec_concatv2si_avx"
+ [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
+ (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
+ "TARGET_AVX"
+ "@
+ vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
+ vpunpckldq\t{%2, %1, %0|%0, %1, %2}
+ vmovd\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "3,4")
+ (const_string "orig")
+ (const_string "vex")))
+ (set_attr "mode" "TI,TI,TI,DI,DI")])
+
(define_insn "*vec_concatv2si_sse4_1"
[(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
(vec_concat:V2SI
[(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "V4SF,V4SF,DI,DI")])
+(define_insn "*vec_concatv4si_1_avx"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "register_operand" " x,x")
+ (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
+ "TARGET_AVX"
+ "@
+ vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+ vmovhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI,V2SF")])
+
(define_insn "*vec_concatv4si_1"
[(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
(vec_concat:V4SI
[(set_attr "type" "sselog,ssemov,ssemov")
(set_attr "mode" "TI,V4SF,V2SF")])
+(define_insn "*vec_concatv2di_avx"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
+ (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
+ "!TARGET_64BIT && TARGET_AVX"
+ "@
+ vmovq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+ vmovhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "1")
+ (const_string "orig")
+ (const_string "vex")))
+ (set_attr "mode" "TI,TI,TI,V2SF")])
+
(define_insn "vec_concatv2di"
[(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
(vec_concat:V2DI
[(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
(set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
+(define_insn "*vec_concatv2di_rex64_avx"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
+ (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
+ "TARGET_64BIT && TARGET_AVX"
+ "@
+ vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
+ vmovq\t{%1, %0|%0, %1}
+ vmovq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+ vmovhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "3")
+ (const_string "orig")
+ (const_string "vex")))
+ (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
+
(define_insn "*vec_concatv2di_rex64_sse4_1"
[(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
(vec_concat:V2DI
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
+(define_insn "*avx_uavgv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (truncate:V16QI
+ (lshiftrt:V16HI
+ (plus:V16HI
+ (plus:V16HI
+ (zero_extend:V16HI
+ (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
+ (zero_extend:V16HI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
+ (const_vector:V16QI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
+ "vpavgb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse2_uavgv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(truncate:V16QI
"TARGET_SSE2"
"ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
+(define_insn "*avx_uavgv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (plus:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
+ "vpavgw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*sse2_uavgv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(truncate:V8HI
;; The correct representation for this is absolutely enormous, and
;; surely not generally useful.
+(define_insn "*avx_psadbw"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSADBW))]
+ "TARGET_AVX"
+ "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_psadbw"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
+(define_insn "avx_movmskp<avxmodesuffixf2c>256"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI
+ [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<sse>_movmskp<ssemodesuffixf2c>"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
[(match_operand:SSEMODEF2P 1 "register_operand" "x")]
UNSPEC_MOVMSK))]
"SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
- "movmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+ "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_insn "sse2_pmovmskb"
(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
UNSPEC_MOVMSK))]
"TARGET_SSE2"
- "pmovmskb\t{%1, %0|%0, %1}"
+ "%vpmovmskb\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_expand "sse2_maskmovdqu"
UNSPEC_MASKMOV))]
"TARGET_SSE2 && !TARGET_64BIT"
;; @@@ check ordering of operands in intel/nonintel syntax
- "maskmovdqu\t{%2, %1|%1, %2}"
+ "%vmaskmovdqu\t{%2, %1|%1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse2_maskmovdqu_rex64"
UNSPEC_MASKMOV))]
"TARGET_SSE2 && TARGET_64BIT"
;; @@@ check ordering of operands in intel/nonintel syntax
- "maskmovdqu\t{%2, %1|%1, %2}"
+ "%vmaskmovdqu\t{%2, %1|%1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse_ldmxcsr"
[(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
UNSPECV_LDMXCSR)]
"TARGET_SSE"
- "ldmxcsr\t%0"
+ "%vldmxcsr\t%0"
[(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
(set_attr "memory" "load")])
(define_insn "sse_stmxcsr"
[(set (match_operand:SI 0 "memory_operand" "=m")
(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
"TARGET_SSE"
- "stmxcsr\t%0"
+ "%vstmxcsr\t%0"
[(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
(set_attr "memory" "store")])
(define_expand "sse_sfence"
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "*avx_phaddwv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vphaddw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ssse3_phaddwv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+(define_insn "*avx_phadddv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_AVX"
+ "vphaddd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ssse3_phadddv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_concat:V4SI
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+(define_insn "*avx_phaddswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ssse3_phaddswv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+(define_insn "*avx_phsubwv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vphsubw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ssse3_phsubwv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+(define_insn "*avx_phsubdv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_AVX"
+ "vphsubd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ssse3_phsubdv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_concat:V4SI
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+(define_insn "*avx_phsubswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ssse3_phsubswv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
"TARGET_SSSE3"
"phsubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "DI")])
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*avx_pmaddubsw128"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ss_plus:V8HI
+ (mult:V8HI
+ (zero_extend:V8HI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)]))))
+ (mult:V8HI
+ (zero_extend:V8HI
+ (vec_select:V16QI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))
+ (sign_extend:V8HI
+ (vec_select:V16QI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)]))))))]
+ "TARGET_AVX"
+ "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
(define_insn "ssse3_pmaddubsw128"
[(set (match_operand:V8HI 0 "register_operand" "=x")
"TARGET_SSSE3"
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+(define_insn "*avx_pmulhrswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 14))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "*ssse3_pmulhrswv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(truncate:V8HI
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+(define_insn "*avx_pshufbv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSHUFB))]
+ "TARGET_AVX"
+ "vpshufb\t{%2, %1, %0|%0, %1, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ssse3_pshufbv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+(define_insn "*avx_psign<mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (unspec:SSEMODE124
+ [(match_operand:SSEMODE124 1 "register_operand" "x")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSIGN))]
+ "TARGET_AVX"
+ "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ssse3_psign<mode>3"
[(set (match_operand:SSEMODE124 0 "register_operand" "=x")
(unspec:SSEMODE124
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+(define_insn "*avx_palignrti"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "x")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ UNSPEC_PALIGNR))]
+ "TARGET_AVX"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "ssse3_palignrti"
[(set (match_operand:TI 0 "register_operand" "=x")
(unspec:TI [(match_operand:TI 1 "register_operand" "0")
[(set (match_operand:SSEMODE124 0 "register_operand" "=x")
(abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
"TARGET_SSSE3"
- "pabs<ssevecsize>\t{%1, %0|%0, %1}";
+ "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "abs<mode>2"
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (vec_merge:AVXMODEF2P
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
+ "TARGET_AVX"
+ "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:AVXMODEF2P 3 "register_operand" "x")]
+ UNSPEC_BLENDV))]
+ "TARGET_AVX"
+ "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "sse4_1_blendp<ssemodesuffixf2c>"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(set_attr "prefix_extra" "1")
(set_attr "mode" "<MODE>")])
+(define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_DP))]
+ "TARGET_AVX"
+ "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
(define_insn "sse4_1_dpp<ssemodesuffixf2c>"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(unspec:SSEMODEF2P
(unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
UNSPEC_MOVNTDQA))]
"TARGET_SSE4_1"
- "movntdqa\t{%1, %0|%0, %1}"
+ "%vmovntdqa\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_mpsadbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_MPSADBW))]
+ "TARGET_AVX"
+ "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_mpsadbw"
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_packusdw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (us_truncate:V4HI
+ (match_operand:V4SI 1 "register_operand" "x"))
+ (us_truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_AVX"
+ "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse4_1_packusdw"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_concat:V8HI
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_pblendvb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 3 "register_operand" "x")]
+ UNSPEC_BLENDV))]
+ "TARGET_AVX"
+ "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse4_1_pblendvb"
[(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_pblendw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")))]
+ "TARGET_AVX"
+ "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse4_1_pblendw"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_merge:V8HI
(unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
UNSPEC_PHMINPOSUW))]
"TARGET_SSE4_1"
- "phminposuw\t{%1, %0|%0, %1}"
+ "%vphminposuw\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_extendv8qiv8hi2"
(const_int 6)
(const_int 7)]))))]
"TARGET_SSE4_1"
- "pmovsxbw\t{%1, %0|%0, %1}"
+ "%vpmovsxbw\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_extendv8qiv8hi2"
(const_int 6)
(const_int 7)]))))]
"TARGET_SSE4_1"
- "pmovsxbw\t{%1, %0|%0, %1}"
+ "%vpmovsxbw\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_extendv4qiv4si2"
(const_int 2)
(const_int 3)]))))]
"TARGET_SSE4_1"
- "pmovsxbd\t{%1, %0|%0, %1}"
+ "%vpmovsxbd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_extendv4qiv4si2"
(const_int 2)
(const_int 3)]))))]
"TARGET_SSE4_1"
- "pmovsxbd\t{%1, %0|%0, %1}"
+ "%vpmovsxbd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_extendv2qiv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovsxbq\t{%1, %0|%0, %1}"
+ "%vpmovsxbq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_extendv2qiv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovsxbq\t{%1, %0|%0, %1}"
+ "%vpmovsxbq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_extendv4hiv4si2"
(const_int 2)
(const_int 3)]))))]
"TARGET_SSE4_1"
- "pmovsxwd\t{%1, %0|%0, %1}"
+ "%vpmovsxwd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_extendv4hiv4si2"
(const_int 2)
(const_int 3)]))))]
"TARGET_SSE4_1"
- "pmovsxwd\t{%1, %0|%0, %1}"
+ "%vpmovsxwd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_extendv2hiv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovsxwq\t{%1, %0|%0, %1}"
+ "%vpmovsxwq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_extendv2hiv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovsxwq\t{%1, %0|%0, %1}"
+ "%vpmovsxwq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_extendv2siv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovsxdq\t{%1, %0|%0, %1}"
+ "%vpmovsxdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_extendv2siv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovsxdq\t{%1, %0|%0, %1}"
+ "%vpmovsxdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_zero_extendv8qiv8hi2"
(const_int 6)
(const_int 7)]))))]
"TARGET_SSE4_1"
- "pmovzxbw\t{%1, %0|%0, %1}"
+ "%vpmovzxbw\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_zero_extendv8qiv8hi2"
(const_int 6)
(const_int 7)]))))]
"TARGET_SSE4_1"
- "pmovzxbw\t{%1, %0|%0, %1}"
+ "%vpmovzxbw\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_zero_extendv4qiv4si2"
(const_int 2)
(const_int 3)]))))]
"TARGET_SSE4_1"
- "pmovzxbd\t{%1, %0|%0, %1}"
+ "%vpmovzxbd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_zero_extendv4qiv4si2"
(const_int 2)
(const_int 3)]))))]
"TARGET_SSE4_1"
- "pmovzxbd\t{%1, %0|%0, %1}"
+ "%vpmovzxbd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_zero_extendv2qiv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovzxbq\t{%1, %0|%0, %1}"
+ "%vpmovzxbq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_zero_extendv2qiv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovzxbq\t{%1, %0|%0, %1}"
+ "%vpmovzxbq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_zero_extendv4hiv4si2"
(const_int 2)
(const_int 3)]))))]
"TARGET_SSE4_1"
- "pmovzxwd\t{%1, %0|%0, %1}"
+ "%vpmovzxwd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_zero_extendv4hiv4si2"
(const_int 2)
(const_int 3)]))))]
"TARGET_SSE4_1"
- "pmovzxwd\t{%1, %0|%0, %1}"
+ "%vpmovzxwd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_zero_extendv2hiv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovzxwq\t{%1, %0|%0, %1}"
+ "%vpmovzxwq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_zero_extendv2hiv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovzxwq\t{%1, %0|%0, %1}"
+ "%vpmovzxwq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "sse4_1_zero_extendv2siv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovzxdq\t{%1, %0|%0, %1}"
+ "%vpmovzxdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*sse4_1_zero_extendv2siv2di2"
(parallel [(const_int 0)
(const_int 1)]))))]
"TARGET_SSE4_1"
- "pmovzxdq\t{%1, %0|%0, %1}"
+ "%vpmovzxdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+;; ptestps/ptestpd are very similar to comiss and ucomiss when
+;; setting FLAGS_REG. But it is not a really compare instruction.
+(define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
+ UNSPEC_VTESTP))]
+ "TARGET_AVX"
+ "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
;; But it is not a really compare instruction.
+(define_insn "avx_ptest256"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
+ (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_PTEST))]
+ "TARGET_AVX"
+ "vptest\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse4_1_ptest"
[(set (reg:CC FLAGS_REG)
(unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
UNSPEC_PTEST))]
"TARGET_SSE4_1"
- "ptest\t{%1, %0|%0, %1}"
+ "%vptest\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecomi")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "avx_roundp<avxmodesuffixf2c>256"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (unspec:AVX256MODEF2P
+ [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND))]
+ "TARGET_AVX"
+ "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "sse4_1_roundp<ssemodesuffixf2c>"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(unspec:SSEMODEF2P
(match_operand:SI 2 "const_0_to_15_operand" "n")]
UNSPEC_ROUND))]
"TARGET_ROUND"
- "roundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_rounds<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND)
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
(define_insn "sse4_1_rounds<ssemodesuffixf2c>"
(match_dup 5)]
UNSPEC_PCMPESTR))]
"TARGET_SSE4_2"
- "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
+ "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "memory" "none,load")
(set_attr "mode" "TI")])
(match_dup 5)]
UNSPEC_PCMPESTR))]
"TARGET_SSE4_2"
- "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
+ "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "memory" "none,load")
(set_attr "mode" "TI")])
(clobber (match_scratch:SI 1 "= X, X,c,c"))]
"TARGET_SSE4_2"
"@
- pcmpestrm\t{%6, %4, %2|%2, %4, %6}
- pcmpestrm\t{%6, %4, %2|%2, %4, %6}
- pcmpestri\t{%6, %4, %2|%2, %4, %6}
- pcmpestri\t{%6, %4, %2|%2, %4, %6}"
+ %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
+ %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
+ %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
+ %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "memory" "none,load,none,load")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn_and_split "sse4_2_pcmpistr"
(match_dup 3)]
UNSPEC_PCMPISTR))]
"TARGET_SSE4_2"
- "pcmpistri\t{%3, %2, %1|%1, %2, %3}"
+ "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "memory" "none,load")
(set_attr "mode" "TI")])
(match_dup 3)]
UNSPEC_PCMPISTR))]
"TARGET_SSE4_2"
- "pcmpistrm\t{%3, %2, %1|%1, %2, %3}"
+ "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "memory" "none,load")
(set_attr "mode" "TI")])
(clobber (match_scratch:SI 1 "= X, X,c,c"))]
"TARGET_SSE4_2"
"@
- pcmpistrm\t{%4, %3, %2|%2, %3, %4}
- pcmpistrm\t{%4, %3, %2|%2, %3, %4}
- pcmpistri\t{%4, %3, %2|%2, %3, %4}
- pcmpistri\t{%4, %3, %2|%2, %3, %4}"
+ %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
+ %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
+ %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
+ %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
(set_attr "memory" "none,load,none,load")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
[(set_attr "type" "ssecmp")
(set_attr "mode" "TI")])
+(define_insn "*avx_aesenc"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESENC))]
+ "TARGET_AES && TARGET_AVX"
+ "vaesenc\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "aesenc"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_aesenclast"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESENCLAST))]
+ "TARGET_AES && TARGET_AVX"
+ "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "aesenclast"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_aesdec"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESDEC))]
+ "TARGET_AES && TARGET_AVX"
+ "vaesdec\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "aesdec"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+(define_insn "*avx_aesdeclast"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESDECLAST))]
+ "TARGET_AES && TARGET_AVX"
+ "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
(define_insn "aesdeclast"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
UNSPEC_AESIMC))]
"TARGET_AES"
- "aesimc\t{%1, %0|%0, %1}"
+ "%vaesimc\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "aeskeygenassist"
(match_operand:SI 2 "const_0_to_255_operand" "n")]
UNSPEC_AESKEYGENASSIST))]
"TARGET_AES"
- "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
+ "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "pclmulqdq"
[(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
+
+(define_expand "avx_vzeroall"
+ [(match_par_dup 0 [(const_int 0)])]
+ "TARGET_AVX"
+{
+ int nregs = TARGET_64BIT ? 16 : 8;
+ int regno;
+
+ operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
+
+ XVECEXP (operands[0], 0, 0)
+ = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+ UNSPECV_VZEROALL);
+
+ for (regno = 0; regno < nregs; regno++)
+ XVECEXP (operands[0], 0, regno + 1)
+ = gen_rtx_SET (VOIDmode,
+ gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
+ CONST0_RTX (V8SImode));
+})
+
+(define_insn "*avx_vzeroall"
+ [(match_parallel 0 "vzeroall_operation"
+ [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
+ (set (match_operand 1 "register_operand" "=x")
+ (match_operand 2 "const0_operand" "X"))])]
+ "TARGET_AVX"
+ "vzeroall"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+;; vzeroupper clobbers the upper 128bits of AVX registers.
+(define_insn "avx_vzeroupper"
+ [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
+ (clobber (reg:V8SI XMM0_REG))
+ (clobber (reg:V8SI XMM1_REG))
+ (clobber (reg:V8SI XMM2_REG))
+ (clobber (reg:V8SI XMM3_REG))
+ (clobber (reg:V8SI XMM4_REG))
+ (clobber (reg:V8SI XMM5_REG))
+ (clobber (reg:V8SI XMM6_REG))
+ (clobber (reg:V8SI XMM7_REG))]
+ "TARGET_AVX && !TARGET_64BIT"
+ "vzeroupper"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx_vzeroupper_rex64"
+ [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
+ (clobber (reg:V8SI XMM0_REG))
+ (clobber (reg:V8SI XMM1_REG))
+ (clobber (reg:V8SI XMM2_REG))
+ (clobber (reg:V8SI XMM3_REG))
+ (clobber (reg:V8SI XMM4_REG))
+ (clobber (reg:V8SI XMM5_REG))
+ (clobber (reg:V8SI XMM6_REG))
+ (clobber (reg:V8SI XMM7_REG))
+ (clobber (reg:V8SI XMM8_REG))
+ (clobber (reg:V8SI XMM9_REG))
+ (clobber (reg:V8SI XMM10_REG))
+ (clobber (reg:V8SI XMM11_REG))
+ (clobber (reg:V8SI XMM12_REG))
+ (clobber (reg:V8SI XMM13_REG))
+ (clobber (reg:V8SI XMM14_REG))
+ (clobber (reg:V8SI XMM15_REG))]
+ "TARGET_AVX && TARGET_64BIT"
+ "vzeroupper"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx_vpermil<mode>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
+ (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
+ UNSPEC_VPERMIL))]
+ "TARGET_AVX"
+ "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_vpermilvar<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
+ UNSPEC_VPERMIL))]
+ "TARGET_AVX"
+ "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_vpermil2<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x,x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "x,xm")
+ (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm,x")
+ (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
+ UNSPEC_VPERMIL2))]
+ "TARGET_AVX"
+ "vpermil2p<avxmodesuffixf2c>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_vperm2f128<mode>3"
+ [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
+ (unspec:AVX256MODE2P
+ [(match_operand:AVX256MODE2P 1 "register_operand" "x")
+ (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_VPERMIL2F128))]
+ "TARGET_AVX"
+ "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
+ (vec_concat:AVXMODEF4P
+ (vec_concat:<avxhalfvecmode>
+ (match_operand:<avxscalarmode> 1 "memory_operand" "m")
+ (match_dup 1))
+ (vec_concat:<avxhalfvecmode>
+ (match_dup 1)
+ (match_dup 1))))]
+ "TARGET_AVX"
+ "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxscalarmode>")])
+
+(define_insn "avx_vbroadcastss256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_concat:V8SF
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (match_operand:SF 1 "memory_operand" "m")
+ (match_dup 1))
+ (vec_concat:V2SF
+ (match_dup 1)
+ (match_dup 1)))
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (match_dup 1)
+ (match_dup 1))
+ (vec_concat:V2SF
+ (match_dup 1)
+ (match_dup 1)))))]
+ "TARGET_AVX"
+ "vbroadcastss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODEF2P
+ (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
+ (match_dup 1)))]
+ "TARGET_AVX"
+ "vbroadcastf128\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "avx_vinsertf128<mode>"
+ [(match_operand:AVX256MODE 0 "register_operand" "")
+ (match_operand:AVX256MODE 1 "register_operand" "")
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_0_to_1_operand" "")]
+ "TARGET_AVX"
+{
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
+ operands[2]));
+ break;
+ case 1:
+ emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
+ operands[2]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+})
+
+(define_insn "vec_set_lo_<mode>"
+ [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE4P
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_<mode>"
+ [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE4P
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_<mode>"
+ [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE8P
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_<mode>"
+ [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE8P
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_v16hi"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_v16hi"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_v32qi"
+ [(set (match_operand:V32QI 0 "register_operand" "=x")
+ (vec_concat:V32QI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x")
+ (parallel [(const_int 16) (const_int 17)
+ (const_int 18) (const_int 19)
+ (const_int 20) (const_int 21)
+ (const_int 22) (const_int 23)
+ (const_int 24) (const_int 25)
+ (const_int 26) (const_int 27)
+ (const_int 28) (const_int 29)
+ (const_int 30) (const_int 31)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_v32qi"
+ [(set (match_operand:V32QI 0 "register_operand" "=x")
+ (vec_concat:V32QI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
+ (match_operand:AVXMODEF2P 2 "register_operand" "x")
+ (match_dup 0)]
+ UNSPEC_MASKLOAD))]
+ "TARGET_AVX"
+ "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "register_operand" "x")
+ (match_dup 0)]
+ UNSPEC_MASKSTORE))]
+ "TARGET_AVX"
+ "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
+ [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
+ (unspec:AVX256MODE2P
+ [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
+ UNSPEC_CAST))]
+ "TARGET_AVX"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "";
+ case 1:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ return "vmovaps\t{%1, %x0|%x0, %1}";
+ case MODE_V4DF:
+ return "vmovapd\t{%1, %x0|%x0, %1}";
+ case MODE_OI:
+ return "vmovdqa\t{%1, %x0|%x0, %1}";
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+ gcc_unreachable ();
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")
+ (set (attr "length")
+ (if_then_else (eq_attr "alternative" "0")
+ (const_string "0")
+ (const_string "*")))])
+
+(define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
+ [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
+ (unspec:<avxhalfvecmode>
+ [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
+ UNSPEC_CAST))]
+ "TARGET_AVX"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "";
+ case 1:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ return "vmovaps\t{%x1, %0|%0, %x1}";
+ case MODE_V4DF:
+ return "vmovapd\t{%x1, %0|%0, %x1}";
+ case MODE_OI:
+ return "vmovdqa\t{%x1, %0|%0, %x1}";
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+ gcc_unreachable ();
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")
+ (set (attr "length")
+ (if_then_else (eq_attr "alternative" "0")
+ (const_string "0")
+ (const_string "*")))])
+
+(define_expand "vec_init<mode>"
+ [(match_operand:AVX256MODE 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_AVX"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "*vec_concat<mode>_avx"
+ [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
+ (vec_concat:AVX256MODE
+ (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
+ (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
+ "TARGET_AVX"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
+ case 1:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ return "vmovaps\t{%1, %x0|%x0, %1}";
+ case MODE_V4DF:
+ return "vmovapd\t{%1, %x0|%x0, %1}";
+ default:
+ return "vmovdqa\t{%1, %x0|%x0, %1}";
+ }
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])